Sync with trunk. Revision 9165

2025-07-09 03:04:10 -04:00 · 2011-05-08 17:36:50 +08:00 · 2011-05-08 17:36:50 +08:00 · 7bd9cd20fe
commit 7bd9cd20fe
parent ea4b5b9054 2760116d3d
229 changed files with 105834 additions and 91526 deletions
--- a/.bzrignore
+++ b/.bzrignore
@ -30,3 +30,4 @@ nbproject/
 .project
 .pydevproject
 .settings/
+*.DS_Store
--- a/Changelog.yaml
+++ b/Changelog.yaml
@ -19,6 +19,106 @@
 #  new recipes:
 #    - title: 

+- version: 0.8.0
+  date: 2010-05-06
+
+  new features:
+    - title: "Go to http://calibre-ebook.com/new-in/eight to see what's new in 0.8.0"
+      type: major
+
+- version: 0.7.59
+  date: 2011-04-30
+
+  bug fixes:
+    - title: "Fixes a bug in 0.7.58 that caused too small fonts when converting to MOBI for the Kindle. Apologies."
+
+    - title: "Apple driver: Handle invalid EPUBs that do not contain an OPF file"
+
+  new recipes:
+    - title: The Big Picture and Auto industry news
+      author: welovelucy
+
+    - title: Gazeta Prawna 
+      author: Vroo
+
+    - title: Various Czech news sources
+      author: Tomas Latal
+
+    - title: Diario de Ibiza
+      author: Joan Tur
+
+- version: 0.7.58
+  date: 2011-04-29
+
+  new features:
+    - title: "Support for converting and reading metadata from Plucker format PDB files"
+      type: major
+
+    - title: "The metadata that is displayed in the book details panel on the right is now completely configurable via Preferences->Look & Feel"
+
+    - title: "Add a column that shows the date when the metadata of a book record was last modified in calibre. To see the column, right click on the column headers in calibre and select Show column->Modified. Note that the dates may be incorrect for books added with older versions of calibre."
+
+    - title: "Add command line option to shutdown running calibre"
+
+    - title: "CHM Input: Store extracted files in the input/ sub dir for easy debugging when --debug-pipeline is specified"
+
+    - title: "Add a popup menu to the 'Create saved search button' to allow easy deleting of saved searches"
+  
+  bug fixes:
+    - title: "Fix regression that broke converting to LIT in 0.7.57"
+      tickets: [769334]
+
+    - title: "Conversion pipeline: Remove encoding declarations from input HTML documents to guarantee that there is only a single encoding declaration in the output HTML."
+      tickets: [773337]
+
+    - title: "Correctly parenthesize searches that are used to make search restrictions"
+
+    - title: "Fix ratings in save to disk templates not being divided by 2"
+
+    - title: "TXT to EPUB: Underlined words (following quotes?) fail to become italics"
+      tickets: [772267]
+
+    - title: "Fix template function source code unavailable when not running calibre from source"
+
+    - title: "Fix adding html books from the top of a deep folder hierarchy very slow"
+
+    - title: "Only set language in MOBI metadata if it is not null"
+
+    - title: "Fix 'count-of' searches (e.g., tags:#>3)."
+      tickets: [771175]
+
+    - title: "Fix regression that broke connection to iTunes in some cases"
+      tickets: [771164]
+
+    - title: "Fix buggy regex that made converting PDFs with the string ****************** very slow"
+      tickets: [770534]
+
+    - title: "Fix Ctrl+L shortcut to lookup word not working in ebook viewer"
+      tickets: [769492]
+
+    - title: "Fix regression that broke searching on boolean columns"
+
+  improved recipes:
+    - HBR Blogs
+    - The Marker
+    - Financial Times
+    - Clarin
+    - Honolulu Star Advertiser
+
+  new recipes:
+    - title: Novi Standard
+      author: Darko Miletic
+
+    - title: Autobild.ro and Social Diva
+      author: Silviu Cotoara
+
+    - title: Novinky
+      author: Tomas Latal
+
+    - title: "De Volksrant (subscriber version)"
+      author: Selcal
+
+
 - version: 0.7.57
  date: 2011-04-22

--- a/recipes/auto_blog.recipe
+++ b/recipes/auto_blog.recipe
@ -0,0 +1,16 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class AutoBlog(BasicNewsRecipe):
+    title          = u'Auto Blog'
+    __author__     = 'Welovelucy'
+    language = 'en'
+    description = 'Auto industry news'
+    oldest_article = 7
+    max_articles_per_feed = 100
+
+    feeds          = [(u'AutoBlog', u'http://www.autoblog.com/rss.xml')]
+
+    def print_version(self, url):
+        return url + 'print/'
+
+
--- a/recipes/autobild.recipe
+++ b/recipes/autobild.recipe
@ -0,0 +1,55 @@
+# -*- coding: utf-8 -*-
+#!/usr/bin/env  python
+
+__license__   = 'GPL v3'
+__copyright__ = u'2011, Silviu Cotoar\u0103'
+'''
+auto-bild.ro
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class AutoBild(BasicNewsRecipe):
+    title                       = u'Auto Bild'
+    __author__                   = u'Silviu Cotoar\u0103'
+    description                   = 'Auto'
+    publisher                   = 'Auto Bild'
+    oldest_article              = 50
+    language                    = 'ro'
+    max_articles_per_feed = 100
+    no_stylesheets          = True
+    use_embedded_content  = False
+    category                  = 'Ziare,Reviste,Auto'
+    encoding                = 'utf-8'
+    cover_url     = 'http://www.auto-bild.ro/images/autobild.gif'
+
+    conversion_options = {
+                             'comments'  : description
+                            ,'tags'           : category
+                            ,'language'    : language
+                            ,'publisher'   : publisher
+                         }
+
+
+    keep_only_tags = [
+                dict(name='div', attrs={'class':'box_2 articol clearfix'})
+                 ]
+
+    remove_tags = [
+             dict(name='div', attrs={'class':['detail']})
+           , dict(name='a', attrs={'id':['zoom_link']})
+           , dict(name='div', attrs={'class':['icons clearfix']})
+           , dict(name='div', attrs={'class':['pub_articol clearfix']})
+
+                          ]
+
+    remove_tags_after = [
+              dict(name='div', attrs={'class':['pub_articol clearfix']})
+           ]
+
+    feeds          = [
+        (u'Feeds', u'http://www.auto-bild.ro/rss/toate')
+                     ]
+
+    def preprocess_html(self, soup):
+        return self.adeify_images(soup)
--- a/recipes/big_picture.recipe
+++ b/recipes/big_picture.recipe
@ -0,0 +1,12 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class BigPicture(BasicNewsRecipe):
+    title          = u'The Big Picture'
+    __author__     = 'Welovelucy'
+    description    = ('Macro perspective on capital markets, economy, technology'
+    ' and digital media')
+    language = 'en'
+    oldest_article = 7
+    max_articles_per_feed = 100
+
+    feeds          = [(u'Big Picture', u'http://feeds.feedburner.com/TheBigPicture')]
--- a/recipes/brand_eins.recipe
+++ b/recipes/brand_eins.recipe
@ -3,7 +3,8 @@

 __license__   = 'GPL v3'
 __copyright__ = '2010, Constantin Hofstetter <consti at consti.de>, Steffen Siebert <calibre at steffensiebert.de>'
-__version__   = '0.98' # 2011-04-10
+__version__   = '0.98'
+
 ''' http://brandeins.de - Wirtschaftsmagazin '''
 import re
 import string
@ -13,8 +14,8 @@ from calibre.web.feeds.recipes import BasicNewsRecipe
 class BrandEins(BasicNewsRecipe):

  title = u'brand eins'
-  __author__ = 'Constantin Hofstetter; Steffen Siebert'
-  description = u'Wirtschaftsmagazin: Gets the last full issue on default. Set a integer value for the username-field to get older issues: 1 -> the newest (but not complete) issue, 2 -> the last complete issue (default), 3 -> the issue before 2 etc.'
+  __author__ = 'Constantin Hofstetter'
+  description = u'Wirtschaftsmagazin'
  publisher ='brandeins.de'
  category = 'politics, business, wirtschaft, Germany'
  use_embedded_content = False
@ -105,10 +106,11 @@ class BrandEins(BasicNewsRecipe):
    keys = issue_map.keys()
    keys.sort()
    keys.reverse()
-    selected_issue = issue_map[keys[issue-1]]
+    selected_issue_key = keys[issue - 1]
+    selected_issue = issue_map[selected_issue_key]
    url = selected_issue.get('href', False)
    # Get the title for the magazin - build it out of the title of the cover - take the issue and year;
-    self.title = "brand eins "+ re.search(r"(?P<date>\d\d\/\d\d\d\d)", selected_issue.find('img').get('title', False)).group('date')
+    self.title = "brand eins " + selected_issue_key[4:] + "/" + selected_issue_key[0:4]
    url = 'http://brandeins.de/'+url

    # url = "http://www.brandeins.de/archiv/magazin/tierisch.html"
@ -161,3 +163,4 @@ class BrandEins(BasicNewsRecipe):
          current_articles.append({'title': title, 'url': url, 'description': description, 'date':''})
    titles_and_articles.append([chapter_title, current_articles])
    return titles_and_articles
+
--- a/recipes/diario_ibiza.recipe
+++ b/recipes/diario_ibiza.recipe
@ -0,0 +1,55 @@
+__license__   = 'GPL v3'
+__author__    = 'Joan Tur, based on El Pais version by Jordi Balcells & elargentino.com version by Darko Miletic'
+description   = 'Principal periodico de las islas Pitiusas, Ibiza y Formentera (Espanya) - v1.06 (29/04/2011)'
+__docformat__ = 'restructuredtext en'
+
+'''
+diariodeibiza.es
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class DiarioDeIbiza(BasicNewsRecipe):
+    __author__        = 'Joan Tur, cullet'
+    description   = 'Principal periodico de las islas Pitiusas, Ibiza y Formentera (Espanya) - v1.06 (29/04/2011)'
+
+    cover_url      = 'http://estaticos01.diariodeibiza.es//elementosWeb/mediaweb/images/logo.jpg'
+    title          = u'Diario de Ibiza digital'
+    publisher      = u'Editorial Prensa Iberica'
+    category       = 'News, politics, culture, economy, general interest'
+    language = 'es'
+
+    encoding              = 'iso-8859-1'
+
+    timefmt        = '[%a, %d %b, %Y]'
+
+    oldest_article = 2
+    max_articles_per_feed = 20
+
+    use_embedded_content  = False
+    recursion             = 5
+
+    remove_javascript = True
+    no_stylesheets = True
+
+    keep_only_tags = [
+                        dict(name='div', attrs={'class':['noticia_titular','epigrafe','subtitulo','actualizada','noticia_fecha','noticia_texto']}),
+                        dict(name='font', attrs={'class':['actualizada']})
+                     ]
+
+    feeds          = [
+                        (u'Portada de Ibiza', u'http://www.diariodeibiza.es/elementosInt/rss/1'),
+                        (u'Pitiuses i Balears', u'http://www.diariodeibiza.es/elementosInt/rss/2'),
+                        (u'Opini\xf3n', u'http://www.diariodeibiza.es/elementosInt/rss/3'),
+                        (u'Nacional', u'http://www.diariodeibiza.es/elementosInt/rss/4'),
+                        (u'Internacional', u'http://www.diariodeibiza.es/elementosInt/rss/5'),
+                        (u'Econom\xeda', u'http://www.diariodeibiza.es/elementosInt/rss/6'),
+                        (u'Deportes', u'http://www.diariodeibiza.es/elementosInt/rss/7'),
+                        (u'Sociedad', u'http://www.diariodeibiza.es/elementosInt/rss/8'),
+                        (u'Ciencia', u'http://www.diariodeibiza.es/elementosInt/rss/11'),
+                        (u'Tecnolog\xeda', u'http://www.diariodeibiza.es/elementosInt/rss/12'),
+                        (u'Gente', u'http://www.diariodeibiza.es/elementosInt/rss/13'),
+                        (u'Sucesos', u'http://www.diariodeibiza.es/elementosInt/rss/15'),
+                        (u'Cultura', u'http://www.diariodeibiza.es/elementosInt/rss/16Piti')
+                    ]
+
--- a/recipes/digizone.recipe
+++ b/recipes/digizone.recipe
@ -0,0 +1,37 @@
+__license__   = 'GPL v3'
+__copyright__ = '2011, Tomas Latal<latal.tomas at gmail.com>'
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class DigiZoneCZ(BasicNewsRecipe):
+    title                   = 'DigiZone'
+    __author__              = 'Tomas Latal'
+    __version__             = '1.0'
+    __date__                = '30 April 2011'
+    description             = u'Aktuality a \u010dl\xe1nky z DigiZone.cz'
+    oldest_article          = 1
+    max_articles_per_feed   = 10
+    encoding                = 'iso-8859-2'
+    publisher               = 'Internet Info s.r.o.'
+    category                = 'digitalni vysilani, televize, CZ'
+    language                = 'cs'
+    publication_type        = 'newsportal'
+    no_stylesheets          = True
+    remove_javascript       = True
+    extra_css               = 'p.perex{font-size: 1.2em; margin: 0 0 10px 0;line-height: 1.4;padding: 0 0 10px 0;font-weight: bold;} \
+                               p.perex img {display:none;} \
+                               .urs p {margin: 0 0 0.8em 0;}'
+
+    feeds          = [
+                        (u'Aktuality', u'http://rss.digizone.cz/aktuality'),
+                        (u'\u010cl\xe1nky', u'http://rss.digizone.cz/clanky')
+                     ]
+
+    remove_tags_before = dict(id=['p-article','p-actuality'])
+
+    remove_tags_after = dict(id=['p-article','p-actuality'])
+
+    remove_tags = [
+                    dict(attrs={'class':['path','mth','lbtr','serial','enquiry','links','dp-n','side','op-ab','op-view','op-sub','op-list',]}),
+                    dict(id=['opinions','discussionList','similarItems','sidebar','footer','opl','promo-box'])
+                  ]
--- a/recipes/f_secure.recipe
+++ b/recipes/f_secure.recipe
@ -12,7 +12,6 @@ class AdvancedUserRecipe1301860159(BasicNewsRecipe):
    max_articles_per_feed = 100
    no_stylesheets = True
    use_embedded_content   = False
-    language = 'en_EN'
    remove_javascript = True
    keep_only_tags = [dict(name='div', attrs={'class':'modSectionTd2'})]
    remove_tags = [dict(name='a'),dict(name='hr')]
--- a/recipes/foxnews.recipe
+++ b/recipes/foxnews.recipe
@ -1,5 +1,5 @@
 __license__   = 'GPL v3'
-__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
+__copyright__ = '2010-2011, Darko Miletic <darko.miletic at gmail.com>'
 '''
 foxnews.com
 '''
@ -23,6 +23,7 @@ class FoxNews(BasicNewsRecipe):
    extra_css             = """
                                body{font-family: Arial,sans-serif }
                                .caption{font-size: x-small}
+                                .author,.dateline{font-size: small}
                            """

    conversion_options = {
@ -34,12 +35,12 @@ class FoxNews(BasicNewsRecipe):

    remove_attributes = ['xmlns','lang']

-    remove_tags = [
-                     dict(name=['object','embed','link','script','iframe','meta','base'])
-                    ,dict(attrs={'class':['user-control','url-description','ad-context']})
-                  ]
+    remove_tags=[
+                  dict(attrs={'class':['user-control','logo','ad-300x250','url-description']})
+                 ,dict(name=['meta','base','link','iframe','object','embed'])
+                ]

-    remove_tags_before=dict(name='h1')
+    keep_only_tags=[dict(attrs={'id':'article-print'})]
    remove_tags_after =dict(attrs={'class':'url-description'})

    feeds = [
@ -55,3 +56,24 @@ class FoxNews(BasicNewsRecipe):

    def print_version(self, url):
        return url + 'print'
+
+    def preprocess_html(self, soup):
+        for item in soup.findAll(style=True):
+            del item['style']
+        for item in soup.findAll('a'):
+            limg = item.find('img')
+            if item.string is not None:
+               str = item.string
+               item.replaceWith(str)
+            else:
+               if limg:
+                  item.name = 'div'
+                  item.attrs = []
+               else:
+                   str = self.tag_to_string(item)
+                   item.replaceWith(str)
+        for item in soup.findAll('img'):
+            if not item.has_key('alt'):
+               item['alt'] = 'image'
+        return soup
+        
--- a/recipes/frazpc.recipe
+++ b/recipes/frazpc.recipe
@ -1,7 +1,7 @@
 #!/usr/bin/env  python

 __license__   = 'GPL v3'
-__copyright__ = u'2010, Tomasz Dlugosz <tomek3d@gmail.com>'
+__copyright__ = u'2010-2011, Tomasz Dlugosz <tomek3d@gmail.com>'
 '''
 frazpc.pl
 '''
@ -19,17 +19,20 @@ class FrazPC(BasicNewsRecipe):
    use_embedded_content = False
    no_stylesheets = True

-    feeds          = [(u'Aktualno\u015bci', u'http://www.frazpc.pl/feed'), (u'Recenzje', u'http://www.frazpc.pl/kat/recenzje-2/feed') ]
-
-    keep_only_tags = [dict(name='div', attrs={'id':'FRAZ_CONTENT'})]
-
-    remove_tags = [dict(name='p', attrs={'class':'gray tagsP fs11'})]
-
-    preprocess_regexps = [
-        (re.compile(i[0], re.IGNORECASE | re.DOTALL), i[1]) for i in
-            [(r'<div id="post-[0-9]*"', lambda match: '<div id="FRAZ_CONTENT"'),
-             (r'href="/f/news/', lambda match: 'href="http://www.frazpc.pl/f/news/'),
-             (r' &nbsp; <a href="http://www.frazpc.pl/[^>]*?">(Skomentuj|Komentarz(e)?\([0-9]*\))</a>&nbsp; \|', lambda match: '')]
+    feeds          = [
+        (u'Aktualno\u015bci', u'http://www.frazpc.pl/feed/aktualnosci'), 
+        (u'Artyku\u0142y', u'http://www.frazpc.pl/feed/artykuly')
    ]

+    keep_only_tags = [dict(name='div', attrs={'class':'article'})]
+
+    remove_tags = [
+        dict(name='div', attrs={'class':'title-wrapper'}),
+        dict(name='p', attrs={'class':'tags'}),
+        dict(name='p', attrs={'class':'article-links'}),
+        dict(name='div', attrs={'class':'comments_box'})
+    ]
+
+    preprocess_regexps = [(re.compile(r'\| <a href="#comments">Komentarze \([0-9]*\)</a>'), lambda match: '')]
+
    remove_attributes = [ 'width', 'height' ]
--- a/recipes/gazeta-prawna-calibre-v1.recipe
+++ b/recipes/gazeta-prawna-calibre-v1.recipe
@ -0,0 +1,53 @@
+#!/usr/bin/env python
+
+__license__ = 'GPL v3'
+__copyright__ = u'2011, Vroo <vroobelek@iq.pl>'
+__author__ = u'Vroo'
+'''
+gazetaprawna.pl
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class gazetaprawna(BasicNewsRecipe):
+    version = 1
+    title          = u'Gazeta Prawna'
+    __author__ = u'Vroo'
+    publisher      = u'Infor Biznes'
+    oldest_article = 7
+    max_articles_per_feed = 20
+    no_stylesheets = True
+    remove_javascript = True
+    description = 'Polski dziennik gospodarczy'
+    language = 'pl'
+    encoding = 'utf-8'
+
+    remove_tags_after = [
+               dict(name='div', attrs={'class':['data-art']})
+    ]
+    remove_tags = [
+               dict(name='div', attrs={'class':['dodatki_artykulu','data-art']})
+    ]
+
+    feeds = [
+      (u'Wiadomo\u015bci - najwa\u017cniejsze', u'http://www.gazetaprawna.pl/wiadomosci/najwazniejsze/rss.xml'),
+      (u'Biznes i prawo gospodarcze', u'http://biznes.gazetaprawna.pl/rss.xml'),
+      (u'Prawo i wymiar sprawiedliwo\u015bci', u'http://prawo.gazetaprawna.pl/rss.xml'),
+      (u'Praca i ubezpieczenia', u'http://praca.gazetaprawna.pl/rss.xml'),
+      (u'Podatki i rachunkowo\u015b\u0107', u'http://podatki.gazetaprawna.pl/rss.xml')
+     ]
+
+
+    def print_version(self, url):
+        url = url.replace('wiadomosci/artykuly', 'drukowanie')
+        url = url.replace('artykuly', 'drukowanie')
+        url = url.replace('porady', 'drukowanie')
+        url = url.replace('wywiady', 'drukowanie')
+        url = url.replace('orzeczenia', 'drukowanie')
+        url = url.replace('galeria', 'drukowanie')
+        url = url.replace('komentarze', 'drukowanie')
+        url = url.replace('biznes.gazetaprawna', 'www.gazetaprawna')
+        url = url.replace('podatki.gazetaprawna', 'www.gazetaprawna')
+        url = url.replace('prawo.gazetaprawna', 'www.gazetaprawna')
+        url = url.replace('praca.gazetaprawna', 'www.gazetaprawna')
+        return url
--- a/recipes/hbr_blogs.recipe
+++ b/recipes/hbr_blogs.recipe
@ -1,9 +1,6 @@
 from calibre.web.feeds.news import BasicNewsRecipe
 import re

-# Needed for BLOGs
-from calibre.web.feeds import Feed
-
 class HBR(BasicNewsRecipe):

    title = 'Harvard Business Review Blogs'
@ -32,6 +29,7 @@ class HBR(BasicNewsRecipe):
        feeds = [('Blog','http://feeds.harvardbusiness.org/harvardbusiness')]
        oldest_article = 30
        max_articles_per_feed = 100
+        use_embedded_content = False
    else:
        timefmt                = ' [%B %Y]'

@ -59,9 +57,9 @@ class HBR(BasicNewsRecipe):
    def get_browser(self):
        br = BasicNewsRecipe.get_browser(self)
        br.open(self.LOGIN_URL)
-        br.select_form(name='signInForm')
-        br['signInForm:username'] = self.username
-        br['signInForm:password'] = self.password
+        br.select_form(name='signin-form')
+        br['signin-form:username'] = self.username
+        br['signin-form:password'] = self.password
        raw = br.submit().read()
        if 'My Account' not in raw:
            raise Exception('Failed to login, are you sure your username and password are correct?')
@ -161,27 +159,13 @@ class HBR(BasicNewsRecipe):
        return startDate, endDate

 #-------------------------------------------------------------------------------------------------
-    def hbr_parse_blogs(self, feeds):
-        # Do the "official" parse_feeds first
-        rssFeeds = Feed()

-        # Use the PARSE_FEEDS method to get a Feeds object of the articles
-        rssFeeds = BasicNewsRecipe.parse_feeds(self)
-
-        # Create a new feed of the right configuration and append to existing afeeds
-        self.feed_to_index_append(rssFeeds[:], feeds)
-
-#-------------------------------------------------------------------------------------------------
    def parse_index(self):
        if self.INCLUDE_ARTICLES == True:
            soup = self.hbr_get_toc()
            feeds = self.hbr_parse_toc(soup)
        else:
-            feeds = []
-
-        # blog stuff
-        if self.INCLUDE_BLOGS == True:
-            self.hbr_parse_blogs(feeds)
+            return BasicNewsRecipe.parse_index(self)

        return feeds
 #-------------------------------------------------------------------------------------------------
--- a/recipes/icons/autobild.png
+++ b/recipes/icons/autobild.png
--- a/recipes/icons/novistandard.png
+++ b/recipes/icons/novistandard.png
--- a/recipes/icons/socialdiva.png
+++ b/recipes/icons/socialdiva.png
--- a/recipes/jezebel.recipe
+++ b/recipes/jezebel.recipe
@ -16,7 +16,7 @@ class Jezebel(BasicNewsRecipe):
    max_articles_per_feed = 100
    no_stylesheets        = True
    encoding              = 'utf-8'
-    use_embedded_content  = False
+    use_embedded_content  = True
    language              = 'en'
    masthead_url          = 'http://cache.gawkerassets.com/assets/jezebel.com/img/logo.png'
    extra_css             = '''
@ -32,13 +32,12 @@ class Jezebel(BasicNewsRecipe):
                        , 'language'  : language
                        }

-    remove_attributes  = ['width','height']
-    keep_only_tags     = [dict(attrs={'class':'content permalink'})]
-    remove_tags_before = dict(name='h1')
-    remove_tags        = [dict(attrs={'class':'contactinfo'})]
-    remove_tags_after  = dict(attrs={'class':'contactinfo'})
+    feeds = [(u'Articles', u'http://feeds.gawker.com/jezebel/vip?format=xml')]
+
+    remove_tags = [
+            {'class': 'feedflare'},
+    ]

-    feeds = [(u'Articles', u'http://feeds.gawker.com/jezebel/full')]

    def preprocess_html(self, soup):
        return self.adeify_images(soup)
--- a/recipes/korea_herald.recipe
+++ b/recipes/korea_herald.recipe
@ -0,0 +1,36 @@
+__license__   = 'GPL v3'
+__copyright__ = '2011, Seongkyoun Yoo <Seongkyoun.yoo at gmail.com>'
+'''
+Profile to download KoreaHerald
+'''
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class KoreaHerald(BasicNewsRecipe):
+    title          = u'KoreaHerald'
+    language = 'en'
+    description = u'Korea Herald News articles'
+    __author__	= 'Seongkyoun Yoo'
+    oldest_article = 10
+    recursions = 3
+    max_articles_per_feed = 10
+    no_stylesheets         = True
+    keep_only_tags    = [
+						dict(id=['contentLeft', '_article'])
+                        ]
+
+    remove_tags = [
+       dict(name='iframe'),
+       dict(name='div', attrs={'class':['left','htit2', 'navigation','banner_txt','banner_img']}),
+       dict(name='ul', attrs={'class':['link_icon', 'flow_icon','detailTextAD110113']}),
+       ]
+
+    feeds = [
+	('All News','http://www.koreaherald.com/rss/020000000000.xml'),
+    ('National','http://www.koreaherald.com/rss/020100000000.xml'),
+    ('Business','http://www.koreaherald.com/rss/020200000000.xml'),
+    ('Life&Style','http://www.koreaherald.com/rss/020300000000.xml'),
+    ('Entertainment','http://www.koreaherald.com/rss/020400000000.xml'),
+    ('Sports','http://www.koreaherald.com/rss/020500000000.xml'),
+    ('Opinion','http://www.koreaherald.com/rss/020600000000.xml'),
+    ('English Cafe','http://www.koreaherald.com/rss/021000000000.xml'),
+	]
--- a/recipes/kotaku.recipe
+++ b/recipes/kotaku.recipe
@ -16,7 +16,7 @@ class Kotaku(BasicNewsRecipe):
    max_articles_per_feed = 100
    no_stylesheets        = True
    encoding              = 'utf-8'
-    use_embedded_content  = False
+    use_embedded_content  = True
    language              = 'en'
    masthead_url          = 'http://cache.gawkerassets.com/assets/kotaku.com/img/logo.png'
    extra_css             = '''
@ -31,13 +31,12 @@ class Kotaku(BasicNewsRecipe):
                        , 'language'  : language
                        }

-    remove_attributes  = ['width','height']
-    keep_only_tags     = [dict(attrs={'class':'content permalink'})]
-    remove_tags_before = dict(name='h1')
-    remove_tags        = [dict(attrs={'class':'contactinfo'})]
-    remove_tags_after  = dict(attrs={'class':'contactinfo'})
+    feeds = [(u'Articles', u'http://feeds.gawker.com/kotaku/vip?format=xml')]
+
+    remove_tags = [
+            {'class': 'feedflare'},
+    ]

-    feeds = [(u'Articles', u'http://feeds.gawker.com/kotaku/full')]

    def preprocess_html(self, soup):
        return self.adeify_images(soup)
--- a/recipes/le_monde.recipe
+++ b/recipes/le_monde.recipe
@ -48,7 +48,7 @@ class LeMonde(BasicNewsRecipe):
            if alink.string is not None:
               tstr = alink.string
               alink.replaceWith(tstr)
-        return soup
+        return self.adeify_images(soup)

    preprocess_regexps = [
        (re.compile(r'([0-9])%'), lambda m: m.group(1) + '&nbsp;%'),
--- a/recipes/lupa.recipe
+++ b/recipes/lupa.recipe
@ -0,0 +1,37 @@
+__license__   = 'GPL v3'
+__copyright__ = '2011, Tomas Latal<latal.tomas at gmail.com>'
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class LupaCZ(BasicNewsRecipe):
+    title                   = 'Lupa'
+    __author__              = 'Tomas Latal'
+    __version__             = '1.0'
+    __date__                = '30 April 2011'
+    description             = u'Zpr\xe1vi\u010dky a \u010dl\xe1nky z Lupa.cz'
+    oldest_article          = 2
+    max_articles_per_feed   = 10
+    encoding                = 'utf8'
+    publisher               = 'Internet Info s.r.o.'
+    category                = 'IT,news,CZ'
+    language                = 'cs'
+    publication_type        = 'newsportal'
+    no_stylesheets          = True
+    remove_javascript       = True
+    extra_css               = 'p.perex{font-size: 1.2em;margin: 0 0 10px 0;line-height: 1.4;padding: 0 0 10px 0;font-weight: bold;} \
+                               p.perex img {display:none;} \
+                               .urs p {margin: 0 0 0.8em 0;}'
+
+    feeds          = [
+                        (u'Zpr\xe1vi\u010dky', u'http://rss.lupa.cz/zpravicky'),
+                        (u'\u010cl\xe1nky', u'http://rss.lupa.cz/clanky')
+                    ]
+
+    remove_tags_before = dict(id='main')
+
+    remove_tags_after = [dict(id='main')]
+
+    remove_tags = [
+                    dict(attrs={'class':['author clear','tags-rubrics','box border style1 links clear','enquiry clear','serial','box border style1 TitleList','breadcrumb clear','article-discussion box border style1 monitoringComponentArticle','link-more border prev-next clear']}),
+                    dict(id=['discussionList','similarItems','sidebar','footer','opl','promo-box'])
+                  ]
--- a/recipes/mesec.recipe
+++ b/recipes/mesec.recipe
@ -0,0 +1,37 @@
+__license__   = 'GPL v3'
+__copyright__ = '2011, Tomas Latal<latal.tomas at gmail.com>'
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class MesecCZ(BasicNewsRecipe):
+    title                   =  u'M\u011b\u0161ec'
+    __author__              = 'Tomas Latal'
+    __version__             = '1.0'
+    __date__                = '30 April 2011'
+    description             = u'Zpr\xe1vi\u010dky a \u010dl\xe1nky z Mesec.cz'
+    oldest_article          = 1
+    max_articles_per_feed   = 10
+    encoding                = 'utf8'
+    publisher               = 'Internet Info s.r.o.'
+    category                = 'finance,CZ'
+    language                = 'cs'
+    publication_type        = 'newsportal'
+    no_stylesheets          = True
+    remove_javascript       = True
+    extra_css               = 'p.perex{font-size: 1.2em;margin: 0 0 10px 0;line-height: 1.4;padding: 0 0 10px 0;font-weight: bold;} \
+                               p.perex img {display:none;} \
+                               .urs p {margin: 0 0 0.8em 0;}'
+
+    feeds          = [
+                        (u'Aktuality', u'http://www.mesec.cz/rss/aktuality/'),
+                        (u'\u010cl\xe1nky', u'http://www.mesec.cz/rss/clanky/')
+                    ]
+
+    remove_tags_before = dict(id='main')
+
+    remove_tags_after = [dict(id='main')]
+
+    remove_tags = [
+                    dict(attrs={'class':['author clear','tags-rubrics','box border style1 links clear','enquiry clear','serial','box border style1 TitleList','breadcrumb clear','article-discussion box border style1 monitoringComponentArticle','link-more border prev-next clear']}),
+                    dict(id=['discussionList','similarItems','sidebar','footer','opl','promo-box'])
+                  ]
--- a/recipes/novinky.recipe
+++ b/recipes/novinky.recipe
@ -0,0 +1,43 @@
+__license__   = 'GPL v3'
+__copyright__ = '2011, Tomas Latal<latal.tomas at gmail.com>'
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class NovinkyCZ(BasicNewsRecipe):
+    title                   = 'Novinky'
+    __author__              = 'Tomas Latal'
+    __version__             = '1.1'
+    __date__                = '30 April 2011'
+    description             = 'News from server Novinky.cz'
+    oldest_article          = 1
+    max_articles_per_feed   = 10
+    encoding                = 'utf8'
+    publisher               = 'Novinky'
+    category                = 'news, CZ'
+    language                = 'cs'
+    publication_type        = 'newsportal'
+    no_stylesheets          = True
+    remove_javascript       = True
+    cover_url               = 'http://img193.imageshack.us/img193/3039/novinkycover.jpg'
+    extra_css               = 'p.acmDescription{font-style:italic;} p.acmAuthor{font-size:0.8em; color:#707070}'
+
+    feeds          = [
+                        (u'Dom\xe1c\xed', u'http://www.novinky.cz/rss/domaci/'),
+                        (u'Zahrani\u010d\xed', u'http://www.novinky.cz/rss/zahranicni/'),
+                        (u'Krimi', u'http://www.novinky.cz/rss/krimi/'),
+                        (u'Ekonomika', u'http://www.novinky.cz/rss/ekonomika/'),
+                        (u'Finance', u'http://www.novinky.cz/rss/finance/'),
+                        (u'Kultura', u'http://www.novinky.cz/rss/kultura/'),
+                        (u'Koktejl', u'http://www.novinky.cz/rss/koktejl/'),
+                        (u'Internet a PC', u'http://www.novinky.cz/rss/internet-a-pc/'),
+                        (u'Auto-moto', u'http://www.novinky.cz/rss/auto/'),
+                    ]
+
+    remove_tags_before = dict(id='articleContent')
+
+    remove_tags_after = [dict(id='movedArticleAuthors')]
+
+    remove_tags = [
+                      dict(name='div', attrs={'id':['articleColumnInfo','pictureInnerBox']}),
+                      dict(name='p', attrs={'id':['articleDate']})
+                  ]
--- a/recipes/novistandard.recipe
+++ b/recipes/novistandard.recipe
@ -0,0 +1,100 @@
+
+__license__   = 'GPL v3'
+__copyright__ = '2011, Darko Miletic <darko.miletic at gmail.com>'
+'''
+www.standard.rs
+'''
+
+import re
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class NoviStandard(BasicNewsRecipe):
+    title                  = 'Novi Standard'
+    __author__             = 'Darko Miletic'
+    description            = 'NoviStandard - energija je neunistiva!'
+    publisher              = 'Novi Standard'
+    category               = 'news, politics, Serbia'
+    no_stylesheets         = True
+    delay                  = 1
+    oldest_article         = 15
+    encoding               = 'utf-8'
+    publication_type       = 'magazine'
+    needs_subscription     = 'optional'
+    remove_empty_feeds     = True
+    INDEX                  = 'http://www.standard.rs/'
+    use_embedded_content   = False
+    language               = 'sr'
+    publication_type       = 'magazine'
+    masthead_url           = 'http://www.standard.rs/templates/ja_opal/images/red/logo.png'
+    extra_css              = """
+                                 @font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)}
+                                 @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)}
+                                 body{font-family: Arial,"Segoe UI","Trebuchet MS",Helvetica,sans1,sans-serif}
+                                 .dropcap{font-family: Georgia,Times,serif1,serif; display:inline}
+                                 .dropcap:first-letter{display: inline; font-size: xx-large; font-weight: bold}
+                                 .contentheading{color: gray; font-size: x-large}
+                                 .article-meta, .createdby{color: red}
+                                 img{margin-top:0.5em; margin-bottom: 0.7em; display: block}
+                             """
+
+    conversion_options = {
+                          'comment'   : description
+                        , 'tags'      : category
+                        , 'publisher' : publisher
+                        , 'language'  : language
+                        }
+
+    preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
+
+    def get_browser(self):
+        br = BasicNewsRecipe.get_browser()
+        br.open(self.INDEX)
+        if self.username is not None and self.password is not None:
+            br.select_form(name='login')
+            br['username'] = self.username
+            br['passwd'  ] = self.password
+            br.submit()
+        return br
+
+    keep_only_tags    =[dict(attrs={'class':['contentheading','article-meta','article-content']})]
+    remove_tags_after =dict(attrs={'class':'extravote-container'})
+    remove_tags       = [
+                           dict(name=['object','link','iframe','meta','base'])
+                          ,dict(attrs={'class':'extravote-container'})
+                        ]
+    remove_attributes =['border','background','height','width','align','valign','lang']
+    feeds             = [
+                            (u'Naslovna', u'http://www.standard.rs/index.php?format=feed&type=rss')
+                           ,(u'Politika', u'http://www.standard.rs/vesti/36-politika.html?format=feed&type=rss')
+                           ,(u'Cvijanovic preporucuje', u'http://www.standard.rs/-cvijanovi-vam-preporuuje.html?format=feed&type=rss')
+                           ,(u'Kolumne', u'http://www.standard.rs/vesti/49-kolumne.html?format=feed&type=rss')
+                           ,(u'Kultura', u'http://www.standard.rs/vesti/40-kultura.html?format=feed&type=rss')
+                           ,(u'Lifestyle', u'http://www.standard.rs/vesti/39-lifestyle.html?format=feed&type=rss')
+                           ,(u'Svet', u'http://www.standard.rs/vesti/41-svet.html?format=feed&type=rss')
+                           ,(u'Ekonomija', u'http://www.standard.rs/vesti/37-ekonomija.html?format=feed&type=rss')
+                           ,(u'Sport', u'http://www.standard.rs/vesti/38-sport.html?format=feed&type=rss')
+                        ]
+
+
+    def preprocess_html(self, soup):
+        for item in soup.findAll(style=True):
+            del item['style']
+        for item in soup.findAll('div'):
+            if len(item.contents) == 0:
+               item.extract()
+        for item in soup.findAll('a'):
+            limg = item.find('img')
+            if item.string is not None:
+               str = item.string
+               item.replaceWith(str)
+            else:
+               if limg:
+                  item.name = 'div'
+                  item.attrs = []
+               else:
+                   str = self.tag_to_string(item)
+                   item.replaceWith(str)
+        for item in soup.findAll('img'):
+            if not item.has_key('alt'):
+               item['alt'] = 'image'
+        return soup
--- a/recipes/podnikatel.recipe
+++ b/recipes/podnikatel.recipe
@ -0,0 +1,37 @@
+__license__   = 'GPL v3'
+__copyright__ = '2011, Tomas Latal<latal.tomas at gmail.com>'
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class PodnikatelCZ(BasicNewsRecipe):
+    title                   = 'Podnikatel'
+    __author__              = 'Tomas Latal'
+    __version__             = '1.0'
+    __date__                = '30 April 2011'
+    description             = u'Aktuality a \u010dl\xe1nky z Podnikatel.cz'
+    oldest_article          = 1
+    max_articles_per_feed   = 10
+    encoding                = 'utf8'
+    publisher               = 'Internet Info s.r.o.'
+    category                = 'podnikani, bussiness, CZ'
+    language                = 'cs'
+    publication_type        = 'newsportal'
+    no_stylesheets          = True
+    remove_javascript       = True
+    extra_css               = 'p.perex{font-size: 1.2em; margin: 0 0 10px 0;line-height: 1.4;padding: 0 0 10px 0;font-weight: bold;} \
+                               p.perex img {display:none;} \
+                               .urs p {margin: 0 0 0.8em 0;}'
+
+    feeds          = [
+                        (u'Aktuality', u'http://rss.podnikatel.cz/aktuality'),
+                        (u'\u010cl\xe1nky', u'http://rss.podnikatel.cz/clanky')
+                    ]
+
+    remove_tags_before = dict(id='art-content')
+
+    remove_tags_after = [dict(id='art-content')]
+
+    remove_tags = [
+                    dict(attrs={'class':['socialshare','box-blue','author clear','labels-terms','box diskuze','ad','page-nav right','infobox','box zpravy','s-clanky']}),
+                    dict(id=['path','article-tools','discussionList','similarItems','promo-box'])
+                  ]
--- a/recipes/socialdiva.recipe
+++ b/recipes/socialdiva.recipe
@ -0,0 +1,54 @@
+# -*- coding: utf-8 -*-
+#!/usr/bin/env  python
+
+__license__   = 'GPL v3'
+__copyright__ = u'2011'
+'''
+socialdiva.ro
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class SocialDiva(BasicNewsRecipe):
+    title                 = u'Social Diva'
+    __author__            = u'Silviu Cotoara'
+    description           = u'When in doubt, wear red'
+    publisher             = 'Social Diva'
+    oldest_article        = 5
+    language              = 'ro'
+    max_articles_per_feed = 100
+    no_stylesheets        = True
+    use_embedded_content  = False
+    category              = 'Ziare,Reviste,Femei'
+    encoding              = 'utf-8'
+    cover_url             = 'http://www.socialdiva.ro/images/logo.png'
+
+    conversion_options = {
+                             'comments'   : description
+                            ,'tags'       : category
+                            ,'language'   : language
+                            ,'publisher'  : publisher
+                         }
+
+    keep_only_tags = [
+            dict(name='div', attrs={'class':'col-alpha mt5 content_articol'}),
+            dict(name='div', attrs={'class':'mt5'})
+                     ]
+
+    remove_tags = [
+             dict(name='a', attrs={'class':['comments float-left scroll mt5']}),
+             dict(name='a', attrs={'class':['comments float-left scroll']}),
+             dict(name='div', attrs={'class':['rating-container relative float-left']}),
+             dict(name='div', attrs={'class':['float-right social_articol']})
+                  ]
+
+    remove_tags_after = [
+            dict(name='a', attrs={'class':['comments float-left scroll mt5']})
+            ]
+
+    feeds          = [
+            (u'Feeds', u'http://www.socialdiva.ro/rss.html')
+                 ]
+
+    def preprocess_html(self, soup):
+        return self.adeify_images(soup)
--- a/recipes/telepolis.recipe
+++ b/recipes/telepolis.recipe
@ -18,7 +18,7 @@ class TelepolisNews(BasicNewsRecipe):
    recursion = 0
    no_stylesheets = True
    encoding = "utf-8"
-    language = 'de_AT'
+    language = 'de'

    use_embedded_content =False
    remove_empty_feeds = True
--- a/recipes/the_marker.recipe
+++ b/recipes/the_marker.recipe
@ -3,7 +3,7 @@ from calibre.web.feeds.news import BasicNewsRecipe

 class AdvancedUserRecipe1283848012(BasicNewsRecipe):
    description   = 'TheMarker Financial News in Hebrew'
-    __author__            = 'TonyTheBookworm, Marbs'
+    __author__            = 'Marbs'
    cover_url      = 'http://static.ispot.co.il/wp-content/upload/2009/09/themarker.jpg'
    title          = u'TheMarker'
    language              = 'he'
@ -11,42 +11,38 @@ class AdvancedUserRecipe1283848012(BasicNewsRecipe):
    remove_javascript     = True
    timefmt        = '[%a, %d %b, %Y]'
    oldest_article = 1
-    remove_tags = [dict(name='tr', attrs={'bgcolor':['#738A94']})          ]
-    max_articles_per_feed = 10
+    keep_only_tags =dict(name='div', attrs={'id':'content'})
+    remove_attributes = ['width','float','margin-left']
+    no_stylesheets        = True
+    remove_tags = [dict(name='div', attrs={'class':['social-nav article-social-nav','prsnlArticleEnvelope','cb']}) ,
+                            dict(name='a', attrs={'href':['/misc/mobile']})  ,
+                            dict(name='span', attrs={'class':['post-summ']}) ]
+    max_articles_per_feed = 100
    extra_css='body{direction: rtl;} .article_description{direction: rtl; } a.article{direction: rtl; } .calibre_feed_description{direction: rtl; }'
-    feeds          = [(u'Head Lines', u'http://www.themarker.com/tmc/content/xml/rss/hpfeed.xml'),
-                      (u'TA Market', u'http://www.themarker.com/tmc/content/xml/rss/sections/marketfeed.xml'),
-                      (u'Real Estate', u'http://www.themarker.com/tmc/content/xml/rss/sections/realEstaterfeed.xml'),
-                      (u'Wall Street & Global', u'http://www.themarker.com/tmc/content/xml/rss/sections/wallsfeed.xml'),
-                      (u'Law', u'http://www.themarker.com/tmc/content/xml/rss/sections/lawfeed.xml'),
-                      (u'Media', u'http://www.themarker.com/tmc/content/xml/rss/sections/mediafeed.xml'),
-                      (u'Consumer', u'http://www.themarker.com/tmc/content/xml/rss/sections/consumerfeed.xml'),
-                      (u'Career', u'http://www.themarker.com/tmc/content/xml/rss/sections/careerfeed.xml'),
-                      (u'Car', u'http://www.themarker.com/tmc/content/xml/rss/sections/carfeed.xml'),
-                      (u'High Tech', u'http://www.themarker.com/tmc/content/xml/rss/sections/hightechfeed.xml'),
-                      (u'Investor Guide', u'http://www.themarker.com/tmc/content/xml/rss/sections/investorGuidefeed.xml')]
+    feeds          = [(u'Head Lines', u'http://www.themarker.com/cmlink/1.144'),
+                      (u'TA Market', u'http://www.themarker.com/cmlink/1.243'),
+                      (u'Real Estate', u'http://www.themarker.com/cmlink/1.605656'),
+                      (u'Global', u'http://www.themarker.com/cmlink/1.605658'),
+                      (u'Wall Street', u'http://www.themarker.com/cmlink/1.613713'),
+                      (u'SmartPhone', u'http://www.themarker.com/cmlink/1.605661'),
+                      (u'Law', u'http://www.themarker.com/cmlink/1.605664'),
+                      (u'Media', u'http://www.themarker.com/cmlink/1.605660'),
+                      (u'Consumer', u'http://www.themarker.com/cmlink/1.605662'),
+                      (u'Career', u'http://www.themarker.com/cmlink/1.605665'),
+                      (u'Car', u'http://www.themarker.com/cmlink/1.605663'),
+                      (u'High Tech', u'http://www.themarker.com/cmlink/1.605659'),
+                      (u'Small Business', u'http://www.themarker.com/cmlink/1.605666')]

    def print_version(self, url):
-        split1 = url.split("=")
-        weblinks = url
+        #split1 = url.split("/")
+        #print_url='http://www.themarker.com/misc/article-print-page/'+split1[-1]
+        txt=url

-        if weblinks is not None:
-            for link in weblinks:
-                #---------------------------------------------------------
-                #here we need some help with some regexpressions
-                #we are trying to find it.themarker.com in a url
-                #-----------------------------------------------------------
-                re1='.*?'   # Non-greedy match on filler
-                re2='(it\\.themarker\\.com)'    # Fully Qualified Domain Name 1
-                rg = re.compile(re1+re2,re.IGNORECASE|re.DOTALL)
-                m = rg.search(url)
+        re1='.*?'	# Non-greedy match on filler
+        re2='(tv)'	# Word 1

-
-                if m:
-                 split2 = url.split("article/")
-                 print_url = 'http://it.themarker.com/tmit/PrintArticle/' + split2[1]
-
-                else:
-                    print_url = 'http://www.themarker.com/ibo/misc/printFriendly.jhtml?ElementId=%2Fibo%2Frepositories%2Fstories%2Fm1_2000%2F' + split1[1]+'.xml'
-
-        return print_url
+        rg = re.compile(re1+re2,re.IGNORECASE|re.DOTALL)
+        m = rg.search(txt)
+        if m:
+            #print 'bad link'
+            return 1
--- a/recipes/time_magazine.recipe
+++ b/recipes/time_magazine.recipe
@ -10,6 +10,8 @@ import re
 from calibre.web.feeds.news import BasicNewsRecipe

 class Time(BasicNewsRecipe):
+    recipe_disabled = ('This recipe has been disabled as TIME no longer'
+            ' publish complete articles on the web.')
    title                 = u'Time'
    __author__            = 'Kovid Goyal and Sujata Raman'
    description           = 'Weekly magazine'
--- a/recipes/usatoday.recipe
+++ b/recipes/usatoday.recipe
@ -7,13 +7,11 @@ usatoday.com
 '''

 from calibre.web.feeds.news import BasicNewsRecipe
-from calibre.ebooks.BeautifulSoup import BeautifulSoup, BeautifulStoneSoup, NavigableString, Tag
-import re

 class USAToday(BasicNewsRecipe):

    title = 'USA Today'
-    __author__ = 'GRiker'
+    __author__ = 'Kovid Goyal'
    oldest_article = 1
    timefmt  = ''
    max_articles_per_feed = 20
@ -31,7 +29,6 @@ class USAToday(BasicNewsRecipe):
                                 margin-bottom: 0em;        \
                                 font-size:     smaller;}\n \
                 .articleBody   {text-align:    left;}\n    '
-    conversion_options = { 'linearize_tables' : True }
    #simultaneous_downloads = 1
    feeds =  [
                ('Top Headlines', 'http://rssfeeds.usatoday.com/usatoday-NewsTopStories'),
@ -47,63 +44,26 @@ class USAToday(BasicNewsRecipe):
                ('Most Popular', 'http://rssfeeds.usatoday.com/Usatoday-MostViewedArticles'),
                ('Offbeat News', 'http://rssfeeds.usatoday.com/UsatodaycomOffbeat-TopStories'),
                ]
-    keep_only_tags = [dict(attrs={'class':[
-                                           'byLine',
-                                           'inside-copy',
-                                           'inside-head',
-                                           'inside-head2',
-                                           'item',
-                                           'item-block',
-                                           'photo-container',
-                                           ]}),
-                      dict(id=[
-                               'applyMainStoryPhoto',
-                               'permalink',
-                               ])]
+    keep_only_tags = [dict(attrs={'class':'story'})]
+    remove_tags = [
+            dict(attrs={'class':[
+                                'share',
+                                'reprints',
+                                'inline-h3',
+                                'info-extras',
+                                'ppy-outer',
+                                'ppy-caption',
+                                'comments',
+                                'jump',
+                                'pagetools',
+                                'post-attributes',
+                                'tags',
+                                'bottom-tools',
+                                'sponsoredlinks',
+                                ]}),
+            dict(id=['pluck']),
+                  ]

-    remove_tags = [dict(attrs={'class':[
-                                        'comments',
-                                        'jump',
-                                        'pagetools',
-                                        'post-attributes',
-                                        'tags',
-                                        ]}),
-                   dict(id=[])]
-
-    #feeds =  [('Most Popular', 'http://rssfeeds.usatoday.com/Usatoday-MostViewedArticles')]
-
-    def dump_hex(self, src, length=16):
-        ''' Diagnostic '''
-        FILTER=''.join([(len(repr(chr(x)))==3) and chr(x) or '.' for x in range(256)])
-        N=0; result=''
-        while src:
-           s,src = src[:length],src[length:]
-           hexa = ' '.join(["%02X"%ord(x) for x in s])
-           s = s.translate(FILTER)
-           result += "%04X   %-*s   %s\n" % (N, length*3, hexa, s)
-           N+=length
-        print result
-
-    def fixChars(self,string):
-        # Replace lsquo (\x91)
-        fixed = re.sub("\x91","&#8216;",string)
-
-        # Replace rsquo (\x92)
-        fixed = re.sub("\x92","&#8217;",fixed)
-
-        # Replace ldquo (\x93)
-        fixed = re.sub("\x93","&#8220;",fixed)
-
-        # Replace rdquo (\x94)
-        fixed = re.sub("\x94","&#8221;",fixed)
-
-        # Replace ndash (\x96)
-        fixed = re.sub("\x96","&#8211;",fixed)
-
-        # Replace mdash (\x97)
-        fixed = re.sub("\x97","&#8212;",fixed)
-
-        return fixed

    def get_masthead_url(self):
        masthead = 'http://i.usatoday.net/mobile/_common/_images/565x73_usat_mobile.gif'
@ -115,321 +75,4 @@ class USAToday(BasicNewsRecipe):
            masthead = None
        return masthead

-    def massageNCXText(self, description):
-        # Kindle TOC descriptions won't render certain characters
-        if description:
-            massaged = unicode(BeautifulStoneSoup(description, convertEntities=BeautifulStoneSoup.HTML_ENTITIES))
-            # Replace '&' with '&#38;'
-            massaged = re.sub("&","&#38;", massaged)
-            return self.fixChars(massaged)
-        else:
-            return description

-    def parse_feeds(self, *args, **kwargs):
-        parsed_feeds = BasicNewsRecipe.parse_feeds(self, *args, **kwargs)
-        # Count articles for progress dialog
-        article_count = 0
-        for feed in parsed_feeds:
-            article_count += len(feed)
-        self.log( "Queued %d articles" % article_count)
-        return parsed_feeds
-
-    def preprocess_html(self, soup):
-        soup = self.strip_anchors(soup)
-        return soup
-
-    def postprocess_html(self, soup, first_fetch):
-
-        # Remove navLinks <div class="inside-copy" style="padding-bottom:3px">
-        navLinks = soup.find(True,{'style':'padding-bottom:3px'})
-        if navLinks:
-            navLinks.extract()
-
-        # Remove <div class="inside-copy" style="margin-bottom:10px">
-        gibberish = soup.find(True,{'style':'margin-bottom:10px'})
-        if gibberish:
-            gibberish.extract()
-
-        # Change <inside-head> to <h2>
-        headline = soup.find(True, {'class':['inside-head','inside-head2']})
-        if not headline:
-            headline = soup.find('h3')
-        if headline:
-            tag = Tag(soup, "h2")
-            tag['class'] = "headline"
-            tag.insert(0, headline.contents[0])
-            headline.replaceWith(tag)
-        else:
-            print "unable to find headline:\n%s\n" % soup
-
-        # Change byLine to byline, change commas to middot
-        # Kindle renders commas in byline as '&'
-        byline = soup.find(True, {'class':'byLine'})
-        if byline:
-            byline['class'] = 'byline'
-            # Replace comma with middot
-            byline.contents[0].replaceWith(re.sub(","," &middot;", byline.renderContents()))
-
-        jumpout_punc_list = [':','?']
-        # Remove the inline jumpouts in <div class="inside-copy">
-        paras = soup.findAll(True, {'class':'inside-copy'})
-        for para in paras:
-            if re.match("<b>[\w\W]+ ",para.renderContents()):
-                p = para.find('b')
-                for punc in jumpout_punc_list:
-                    punc_offset = p.contents[0].find(punc)
-                    if punc_offset == -1:
-                        continue
-                    if punc_offset > 1:
-                        if p.contents[0][:punc_offset] == p.contents[0][:punc_offset].upper():
-                            #print "extracting \n%s\n" % para.prettify()
-                            para.extract()
-
-        # Reset class for remaining
-        paras = soup.findAll(True, {'class':'inside-copy'})
-        for para in paras:
-            para['class'] = 'articleBody'
-
-        # Remove inline jumpouts in <p>
-        paras = soup.findAll(['p'])
-        for p in paras:
-            if hasattr(p,'contents') and len(p.contents):
-                for punc in jumpout_punc_list:
-                    punc_offset = p.contents[0].find(punc)
-                    if punc_offset == -1:
-                        continue
-                    if punc_offset > 2 and hasattr(p,'a') and len(p.contents):
-                        #print "evaluating %s\n" % p.contents[0][:punc_offset+1]
-                        if p.contents[0][:punc_offset] == p.contents[0][:punc_offset].upper():
-                            #print "extracting \n%s\n" % p.prettify()
-                            p.extract()
-
-        # Capture the first img, insert after headline
-        imgs = soup.findAll('img')
-        print "postprocess_html(): %d images" % len(imgs)
-        if imgs:
-            divTag = Tag(soup, 'div')
-            divTag['class'] = 'image'
-            body = soup.find('body')
-            img = imgs[0]
-            #print "img: \n%s\n" % img.prettify()
-
-            # Table for photo and credit
-            tableTag = Tag(soup,'table')
-
-            # Photo
-            trimgTag = Tag(soup, 'tr')
-            tdimgTag = Tag(soup, 'td')
-            tdimgTag.insert(0,img)
-            trimgTag.insert(0,tdimgTag)
-            tableTag.insert(0,trimgTag)
-
-            # Credit
-            trcreditTag = Tag(soup, 'tr')
-
-            tdcreditTag = Tag(soup, 'td')
-            tdcreditTag['class'] = 'credit'
-            credit = soup.find('td',{'class':'photoCredit'})
-            if credit:
-                tdcreditTag.insert(0,NavigableString(credit.renderContents()))
-            else:
-                credit = img['credit']
-                if credit:
-                    tdcreditTag.insert(0,NavigableString(credit))
-                else:
-                    tdcreditTag.insert(0,NavigableString(''))
-
-            trcreditTag.insert(0,tdcreditTag)
-            tableTag.insert(1,trcreditTag)
-            dtc = 0
-            divTag.insert(dtc,tableTag)
-            dtc += 1
-
-            if False:
-                # Add the caption in the table
-                tableCaptionTag = Tag(soup,'caption')
-                tableCaptionTag.insert(0,soup.find('td',{'class':'photoCredit'}).renderContents())
-                tableTag.insert(1,tableCaptionTag)
-                divTag.insert(dtc,tableTag)
-                dtc += 1
-                body.insert(1,divTag)
-            else:
-                # Add the caption below the table
-                #print "Looking for caption in this soup:\n%s" % img.prettify()
-                captionTag = Tag(soup,'p')
-                captionTag['class'] = 'caption'
-                if hasattr(img,'alt') and img['alt']:
-                    captionTag.insert(0,NavigableString('<blockquote>%s</blockquote>' % img['alt']))
-                    divTag.insert(dtc, captionTag)
-                    dtc += 1
-                else:
-                    try:
-                        captionTag.insert(0,NavigableString('<blockquote>%s</blockquote>' % img['cutline']))
-                        divTag.insert(dtc, captionTag)
-                        dtc += 1
-                    except:
-                        pass
-
-            hrTag = Tag(soup, 'hr')
-            divTag.insert(dtc, hrTag)
-            dtc += 1
-
-            # Delete <div id="applyMainStoryPhoto"
-            photoJunk = soup.find('div',{'id':'applyMainStoryPhoto'})
-            if photoJunk:
-                photoJunk.extract()
-
-            # Insert img after headline
-            tag = body.find(True)
-            insertLoc = 0
-            headline_found = False
-            while True:
-                # Scan the top-level tags
-                insertLoc += 1
-                if hasattr(tag,'class') and tag['class'] == 'headline':
-                    headline_found = True
-                    body.insert(insertLoc,divTag)
-                    break
-                tag = tag.nextSibling
-                if not tag:
-                    break
-
-            if not headline_found:
-                # Monolithic <div> - restructure
-                tag = body.find(True)
-                while True:
-                    insertLoc += 1
-                    try:
-                        if hasattr(tag,'class') and tag['class'] == 'headline':
-                            headline_found = True
-                            tag.insert(insertLoc,divTag)
-                            break
-                    except:
-                        pass
-                    tag = tag.next
-                    if not tag:
-                        break
-
-                # Yank out headline, img and caption
-                headline = body.find('h2','headline')
-                img = body.find('div','image')
-                caption = body.find('p''class')
-
-                # body(0) is calibre_navbar
-                # body(1) is <div class="item">
-
-                btc = 1
-                headline.extract()
-                body.insert(1, headline)
-                btc += 1
-                if img:
-                    img.extract()
-                    body.insert(btc, img)
-                    btc += 1
-                if caption:
-                    caption.extract()
-                    body.insert(btc, caption)
-                    btc += 1
-
-            if len(imgs) > 1:
-                if True:
-                    [img.extract() for img in imgs[1:]]
-                else:
-                    # Format the remaining images
-                    # This doesn't work yet
-                    for img in imgs[1:]:
-                        print "img:\n%s\n" % img.prettify()
-                        divTag = Tag(soup, 'div')
-                        divTag['class'] = 'image'
-
-                        # Table for photo and credit
-                        tableTag = Tag(soup,'table')
-
-                        # Photo
-                        trimgTag = Tag(soup, 'tr')
-                        tdimgTag = Tag(soup, 'td')
-                        tdimgTag.insert(0,img)
-                        trimgTag.insert(0,tdimgTag)
-                        tableTag.insert(0,trimgTag)
-
-                        # Credit
-                        trcreditTag = Tag(soup, 'tr')
-
-                        tdcreditTag = Tag(soup, 'td')
-                        tdcreditTag['class'] = 'credit'
-                        try:
-                            tdcreditTag.insert(0,NavigableString(img['credit']))
-                        except:
-                            tdcreditTag.insert(0,NavigableString(''))
-                        trcreditTag.insert(0,tdcreditTag)
-                        tableTag.insert(1,trcreditTag)
-                        divTag.insert(0,tableTag)
-                        soup.img.replaceWith(divTag)
-
-        return soup
-
-    def postprocess_book(self, oeb, opts, log) :
-
-        def extract_byline(href) :
-            # <meta name="byline" content=
-            soup = BeautifulSoup(str(oeb.manifest.hrefs[href]))
-            byline = soup.find('div',attrs={'class':'byline'})
-            if byline:
-                byline['class'] = 'byline'
-                # Replace comma with middot
-                byline.contents[0].replaceWith(re.sub(u",", u" &middot;",
-                    byline.renderContents(encoding=None)))
-                return byline.renderContents(encoding=None)
-            else :
-                paras = soup.findAll(text=True)
-                for para in paras:
-                    if para.startswith("Copyright"):
-                        return para[len('Copyright xxxx '):para.find('.')]
-                return None
-
-        def extract_description(href) :
-            soup = BeautifulSoup(str(oeb.manifest.hrefs[href]))
-            description = soup.find('meta',attrs={'name':'description'})
-            if description :
-                return self.massageNCXText(description['content'])
-            else:
-                # Take first paragraph of article
-                articleBody = soup.find('div',attrs={'id':['articleBody','item']})
-                if articleBody:
-                    paras = articleBody.findAll('p')
-                    for p in paras:
-                        if p.renderContents() > '' :
-                            return self.massageNCXText(self.tag_to_string(p,use_alt=False))
-                else:
-                    print "Didn't find <div id='articleBody'> in this soup:\n%s" % soup.prettify()
-                    return None
-
-        # Method entry point here
-        # Single section toc looks different than multi-section tocs
-        if oeb.toc.depth() == 2 :
-            for article in oeb.toc :
-                if article.author is None :
-                    article.author = extract_byline(article.href)
-                if article.description is None :
-                    article.description = extract_description(article.href)
-        elif oeb.toc.depth() == 3 :
-            for section in oeb.toc :
-                for article in section :
-                    article.author = extract_byline(article.href)
-                    '''
-                    if article.author is None :
-                        article.author = self.massageNCXText(extract_byline(article.href))
-                    else:
-                        article.author = self.massageNCXText(article.author)
-                    '''
-                    if article.description is None :
-                        article.description = extract_description(article.href)
-
-    def strip_anchors(self,soup):
-        paras = soup.findAll(True)
-        for para in paras:
-            aTags = para.findAll('a')
-            for a in aTags:
-                if a.img is None:
-                    a.replaceWith(a.renderContents().decode('cp1252','replace'))
-        return soup
--- a/recipes/vitalia.recipe
+++ b/recipes/vitalia.recipe
@ -0,0 +1,39 @@
+__license__   = 'GPL v3'
+__copyright__ = '2011, Tomas Latal<latal.tomas at gmail.com>'
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class VitaliaCZ(BasicNewsRecipe):
+    title                   = 'Vitalia'
+    __author__              = 'Tomas Latal'
+    __version__             = '1.0'
+    __date__                = '30 April 2011'
+    description             = u'Aktuality a \u010dl\xe1nky z Vitalia.cz'
+    oldest_article          = 1
+    max_articles_per_feed   = 10
+    encoding                = 'utf8'
+    publisher               = 'Internet Info s.r.o.'
+    category                = 'zdravi, vztahy, wellness, CZ'
+    language                = 'cs'
+    publication_type        = 'newsportal'
+    no_stylesheets          = True
+    remove_javascript       = True
+    extra_css               = 'p.perex{font-size: 1.2em; margin: 0 0 10px 0; line-height: 1.4; padding: 0 0 10px 0; font-weight: bold;} \
+                               p.perex img {display:none;} \
+                               span.author {font-size:0.8em; font-style:italic} \
+                               .urs div.rs-tip-major {padding:0.5em; background: #e0e0e0 none repeat scroll 0 0;border: 1px solid #909090;} \
+                               .urs p {margin: 0 0 0.8em 0;}'
+
+    feeds          = [
+                        (u'Aktuality', 'http://www.vitalia.cz/rss/aktuality/'),
+                        (u'\u010cl\xe1nky', u'http://www.vitalia.cz/rss/clanky/'),
+                    ]
+
+    remove_tags_before = dict(id='main')
+
+    remove_tags_after = [dict(id='main')]
+
+    remove_tags = [
+                    dict(attrs={'class':['author clear','tags-rubrics','box border style1 links clear','enquiry clear','serial','box border style1 TitleList','breadcrumb clear','article-discussion box border style1 monitoringComponentArticle','link-more border prev-next clear']}),
+                    dict(id=['discussionList','similarItems','sidebar','footer','opl','promo-box'])
+                  ]
--- a/recipes/volksrant_sub.recipe
+++ b/recipes/volksrant_sub.recipe
@ -0,0 +1,115 @@
+from calibre import strftime
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class Volkskrant_full(BasicNewsRecipe):
+    # This recipe will download the Volkskrant newspaper,
+    # from the subscribers site. It requires a password.
+    # Known issues are: articles that are spread out over
+    # multiple pages will appear multiple times. Pages
+    # that contain only adverts will appear, but empty.
+    # The supplement 'Volkskrant Magazine' on saturday
+    # is currently not downloaded.
+    # You can set a manual date, to download an archived
+    # newspaper. Volkskrant stores over a month at the
+    # moment of writing. To do so I suggest you unmark
+    # the date on the line below, and insert it in the title. Then
+    # follow the instructions marked further below.
+
+    title = 'De Volkskrant (subscription)' # [za, 13 nov 2010]'
+    __author__ = u'Selcal'
+    description = u"Volkskrant"
+    oldest_article = 30
+    max_articles_per_feed = 100
+    no_stylesheets = True
+    language = 'nl'
+    use_embedded_content = False
+    simultaneous_downloads = 1
+    delay = 1
+    needs_subscription = True
+    # Set RETRIEVEDATE to 'yyyymmdd' to load an older
+    # edition. Otherwise keep '%Y%m%d'
+    # When setting a manual date, unmark and add the date
+    # to the title above, and unmark the timefmt line to stop
+    # Calibre from adding today's date in addition.
+
+    # timefmt = ''
+    RETRIEVEDATE = strftime('%Y%m%d')
+    INDEX_MAIN = 'http://www.volkskrant.nl/vk-online/VK/' + RETRIEVEDATE + '___/VKN01_001/#text'
+    INDEX_ARTICLE = 'http://www.volkskrant.nl/vk-online/VK/' + RETRIEVEDATE + '___/VKN01_001/'
+    LOGIN = 'http://www.volkskrant.nl/vk/user/loggedIn.do'
+    remove_tags = [dict(name='address')]
+    cover_url = 'http://www.volkskrant.nl/vk-online/VK/' + RETRIEVEDATE + '___/VKN01_001/page.jpg'
+
+    def get_browser(self):
+        br = BasicNewsRecipe.get_browser()
+
+        if self.username is not None and self.password is not None:
+           br.open(self.LOGIN)
+           br.select_form(nr = 0)
+           br['username'] = self.username
+           br['password'] = self.password
+           br.submit()
+        return br
+
+    def parse_index(self):
+        krant = []
+        def strip_title(_title):
+            i = 0
+            while ((_title[i] <> ":") and (i <= len(_title))):
+               i = i + 1
+            return(_title[0:i])
+        for temp in range (5):
+              try:
+                soup = self.index_to_soup(self.INDEX_MAIN)
+                break
+              except:
+                #print '(Retrying main index load)'
+                continue
+        mainsoup = soup.find('td', attrs={'id': 'select_page_top'})
+        for option in mainsoup.findAll('option'):
+           articles = []
+           _INDEX = 'http://www.volkskrant.nl/vk-online/VK/' + self.RETRIEVEDATE + '___/' + option['value'] + '/#text'
+           _INDEX_ARTICLE = 'http://www.volkskrant.nl/vk-online/VK/' + self.RETRIEVEDATE + '___/' + option['value'] + '/'
+           #print ''
+           #print '<-------    Processing section: ' + _INDEX + ' ------------------------->'
+           for temp in range (5):
+              try:
+                soup = self.index_to_soup(_INDEX)
+                break
+              except:
+                #print '(Retrying index load)'
+                continue
+           for item in soup.findAll('area'):
+              art_nr = item['class']
+              attrname = art_nr[0:12] + '_section' + option['value'][0:5] + '_' + art_nr[26:len(art_nr)]
+              #print '==> Found: ' + attrname;
+              index_title = soup.find('div', attrs={'class': attrname})
+              get_title = index_title['title'];
+              _ARTICLE   = _INDEX_ARTICLE + attrname + '.html#text'
+              title = get_title;
+              #print '--> Title: ' + title;
+              #print '--> URL: ' + _ARTICLE;
+              for temp in range (5):
+                 try:
+                   souparticle =  self.index_to_soup(_ARTICLE);
+                   break
+                 except:
+                   print '(Retrying URL load)'
+                   continue
+              headerurl = souparticle.findAll('frame')[0]['src'];
+              #print '--> Read frame name for header: ' + headerurl;
+              url = _INDEX_ARTICLE + headerurl[0:len(headerurl)-12] + '_text.html';
+              #print '--> Corrected URL: ' + url;
+              if (get_title <> ''):
+                 title = strip_title(get_title)
+                 date  = strftime(' %B %Y')
+              if (title <> ''):
+                 articles.append({
+                                         'title'      :title
+                                        ,'date'       :date
+                                        ,'url'        :url
+                                        ,'description':''
+                                        })
+           krant.append( (option.string, articles))
+        return krant
+
--- a/resources/default_tweaks.py
+++ b/resources/default_tweaks.py
@ -118,6 +118,7 @@ sort_columns_at_startup = None
 # timestamp default if not set: dd MMM yyyy
 gui_pubdate_display_format = 'MMM yyyy'
 gui_timestamp_display_format = 'dd MMM yyyy'
+gui_last_modified_display_format = 'dd MMM yyyy'

 #: Control sorting of titles and series in the library display
 # Control title and series sorting in the library view. If set to
--- a/resources/metadata_sqlite.sql
+++ b/resources/metadata_sqlite.sql
@ -7,17 +7,30 @@ CREATE TABLE books ( id      INTEGER PRIMARY KEY AUTOINCREMENT,
                             title     TEXT NOT NULL DEFAULT 'Unknown' COLLATE NOCASE,
                             sort      TEXT COLLATE NOCASE,
                             timestamp TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
-                             uri       TEXT,
-                             series_index INTEGER NOT NULL DEFAULT 1,
+                             pubdate   TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
+                             series_index REAL NOT NULL DEFAULT 1.0,
                             author_sort TEXT COLLATE NOCASE,
                             isbn TEXT DEFAULT "" COLLATE NOCASE,
-                             path TEXT NOT NULL DEFAULT ""
-                        );
+                             lccn TEXT DEFAULT "" COLLATE NOCASE,
+                             path TEXT NOT NULL DEFAULT "",
+                             flags INTEGER NOT NULL DEFAULT 1
+                        , uuid TEXT, has_cover BOOL DEFAULT 0, last_modified TIMESTAMP NOT NULL DEFAULT "2000-01-01 00:00:00+00:00");
 CREATE TABLE books_authors_link ( id INTEGER PRIMARY KEY,
                                          book INTEGER NOT NULL,
                                          author INTEGER NOT NULL,
                                          UNIQUE(book, author)
                                        );
+CREATE TABLE books_languages_link ( id INTEGER PRIMARY KEY,
+                                            book INTEGER NOT NULL,
+                                            lang_code INTEGER NOT NULL,
+                                            item_order INTEGER NOT NULL DEFAULT 0,
+                                            UNIQUE(book, lang_code)
+        );
+CREATE TABLE books_plugin_data(id INTEGER PRIMARY KEY,
+                                     book INTEGER NON NULL,
+                                     name TEXT NON NULL,
+                                     val TEXT NON NULL,
+                                     UNIQUE(book,name));
 CREATE TABLE books_publishers_link ( id INTEGER PRIMARY KEY,
                                          book INTEGER NOT NULL,
                                          publisher INTEGER NOT NULL,
@ -49,11 +62,51 @@ CREATE TABLE conversion_options ( id INTEGER PRIMARY KEY,
                                          data BLOB NOT NULL,
                                          UNIQUE(format,book)
                                        );
+CREATE TABLE custom_columns (
+                    id       INTEGER PRIMARY KEY AUTOINCREMENT,
+                    label    TEXT NOT NULL,
+                    name     TEXT NOT NULL,
+                    datatype TEXT NOT NULL,
+                    mark_for_delete   BOOL DEFAULT 0 NOT NULL,
+                    editable BOOL DEFAULT 1 NOT NULL,
+                    display  TEXT DEFAULT "{}" NOT NULL,
+                    is_multiple BOOL DEFAULT 0 NOT NULL,
+                    normalized BOOL NOT NULL,
+                    UNIQUE(label)
+                );
+CREATE TABLE data ( id     INTEGER PRIMARY KEY,
+                            book   INTEGER NON NULL,
+                            format TEXT NON NULL COLLATE NOCASE,
+                            uncompressed_size INTEGER NON NULL,
+                            name TEXT NON NULL,
+                            UNIQUE(book, format)
+);
 CREATE TABLE feeds ( id   INTEGER PRIMARY KEY,
                              title TEXT NOT NULL,
                              script TEXT NOT NULL,
                              UNIQUE(title)
                             );
+CREATE TABLE identifiers  ( id     INTEGER PRIMARY KEY,
+                                    book   INTEGER NON NULL,
+                                    type   TEXT NON NULL DEFAULT "isbn" COLLATE NOCASE,
+                                    val    TEXT NON NULL COLLATE NOCASE,
+                                    UNIQUE(book, type)
+        );
+CREATE TABLE languages    ( id        INTEGER PRIMARY KEY,
+                                    lang_code TEXT NON NULL COLLATE NOCASE,
+                                    UNIQUE(lang_code)
+        );
+CREATE TABLE library_id ( id   INTEGER PRIMARY KEY,
+                                  uuid TEXT NOT NULL,
+                                  UNIQUE(uuid)
+        );
+CREATE TABLE metadata_dirtied(id INTEGER PRIMARY KEY,
+                             book INTEGER NOT NULL,
+                             UNIQUE(book));
+CREATE TABLE preferences(id INTEGER PRIMARY KEY,
+                                 key TEXT NON NULL,
+                                 val TEXT NON NULL,
+                                 UNIQUE(key));
 CREATE TABLE publishers ( id   INTEGER PRIMARY KEY,
                                  name TEXT NOT NULL COLLATE NOCASE,
                                  sort TEXT COLLATE NOCASE,
@ -72,34 +125,143 @@ CREATE TABLE tags ( id   INTEGER PRIMARY KEY,
                            name TEXT NOT NULL COLLATE NOCASE,
                            UNIQUE (name)
                             );
-CREATE TABLE data ( id     INTEGER PRIMARY KEY,
-                            book   INTEGER NON NULL,
-                            format TEXT NON NULL COLLATE NOCASE,
-                            uncompressed_size INTEGER NON NULL,
-                            name TEXT NON NULL,
-                            UNIQUE(book, format)
-);
-
 CREATE VIEW meta AS
-    SELECT id, title,
-           (SELECT concat(name) FROM authors WHERE authors.id IN (SELECT author from books_authors_link WHERE book=books.id)) authors,
-           (SELECT name FROM publishers WHERE publishers.id IN (SELECT publisher from books_publishers_link WHERE book=books.id)) publisher,
-           (SELECT rating FROM ratings WHERE ratings.id IN (SELECT rating from books_ratings_link WHERE book=books.id)) rating,
-           timestamp,
-           (SELECT MAX(uncompressed_size) FROM data WHERE book=books.id) size,
-           (SELECT concat(name) FROM tags WHERE tags.id IN (SELECT tag from books_tags_link WHERE book=books.id)) tags,
-           (SELECT text FROM comments WHERE book=books.id) comments,
-           (SELECT name FROM series WHERE series.id IN (SELECT series FROM books_series_link WHERE book=books.id)) series,
-           series_index,
-           sort,
-           author_sort,
-           (SELECT concat(format) FROM data WHERE data.book=books.id) formats,
-           isbn
-    FROM books;
+        SELECT id, title,
+               (SELECT sortconcat(bal.id, name) FROM books_authors_link AS bal JOIN authors ON(author = authors.id) WHERE book = books.id) authors,
+               (SELECT name FROM publishers WHERE publishers.id IN (SELECT publisher from books_publishers_link WHERE book=books.id)) publisher,
+               (SELECT rating FROM ratings WHERE ratings.id IN (SELECT rating from books_ratings_link WHERE book=books.id)) rating,
+               timestamp,
+               (SELECT MAX(uncompressed_size) FROM data WHERE book=books.id) size,
+               (SELECT concat(name) FROM tags WHERE tags.id IN (SELECT tag from books_tags_link WHERE book=books.id)) tags,
+               (SELECT text FROM comments WHERE book=books.id) comments,
+               (SELECT name FROM series WHERE series.id IN (SELECT series FROM books_series_link WHERE book=books.id)) series,
+               series_index,
+               sort,
+               author_sort,
+               (SELECT concat(format) FROM data WHERE data.book=books.id) formats,
+               isbn,
+               path,
+               lccn,
+               pubdate,
+               flags,
+               uuid
+        FROM books;
+CREATE VIEW tag_browser_authors AS SELECT
+                    id,
+                    name,
+                    (SELECT COUNT(id) FROM books_authors_link WHERE author=authors.id) count,
+                    (SELECT AVG(ratings.rating)
+                     FROM books_authors_link AS tl, books_ratings_link AS bl, ratings
+                     WHERE tl.author=authors.id AND bl.book=tl.book AND
+                     ratings.id = bl.rating AND ratings.rating <> 0) avg_rating,
+                     sort AS sort
+                FROM authors;
+CREATE VIEW tag_browser_filtered_authors AS SELECT
+                    id,
+                    name,
+                    (SELECT COUNT(books_authors_link.id) FROM books_authors_link WHERE
+                        author=authors.id AND books_list_filter(book)) count,
+                    (SELECT AVG(ratings.rating)
+                     FROM books_authors_link AS tl, books_ratings_link AS bl, ratings
+                     WHERE tl.author=authors.id AND bl.book=tl.book AND
+                     ratings.id = bl.rating AND ratings.rating <> 0 AND
+                     books_list_filter(bl.book)) avg_rating,
+                     sort AS sort
+                FROM authors;
+CREATE VIEW tag_browser_filtered_publishers AS SELECT
+                    id,
+                    name,
+                    (SELECT COUNT(books_publishers_link.id) FROM books_publishers_link WHERE
+                        publisher=publishers.id AND books_list_filter(book)) count,
+                    (SELECT AVG(ratings.rating)
+                     FROM books_publishers_link AS tl, books_ratings_link AS bl, ratings
+                     WHERE tl.publisher=publishers.id AND bl.book=tl.book AND
+                     ratings.id = bl.rating AND ratings.rating <> 0 AND
+                     books_list_filter(bl.book)) avg_rating,
+                     name AS sort
+                FROM publishers;
+CREATE VIEW tag_browser_filtered_ratings AS SELECT
+                    id,
+                    rating,
+                    (SELECT COUNT(books_ratings_link.id) FROM books_ratings_link WHERE
+                        rating=ratings.id AND books_list_filter(book)) count,
+                    (SELECT AVG(ratings.rating)
+                     FROM books_ratings_link AS tl, books_ratings_link AS bl, ratings
+                     WHERE tl.rating=ratings.id AND bl.book=tl.book AND
+                     ratings.id = bl.rating AND ratings.rating <> 0 AND
+                     books_list_filter(bl.book)) avg_rating,
+                     rating AS sort
+                FROM ratings;
+CREATE VIEW tag_browser_filtered_series AS SELECT
+                    id,
+                    name,
+                    (SELECT COUNT(books_series_link.id) FROM books_series_link WHERE
+                        series=series.id AND books_list_filter(book)) count,
+                    (SELECT AVG(ratings.rating)
+                     FROM books_series_link AS tl, books_ratings_link AS bl, ratings
+                     WHERE tl.series=series.id AND bl.book=tl.book AND
+                     ratings.id = bl.rating AND ratings.rating <> 0 AND
+                     books_list_filter(bl.book)) avg_rating,
+                     (title_sort(name)) AS sort
+                FROM series;
+CREATE VIEW tag_browser_filtered_tags AS SELECT
+                    id,
+                    name,
+                    (SELECT COUNT(books_tags_link.id) FROM books_tags_link WHERE
+                        tag=tags.id AND books_list_filter(book)) count,
+                    (SELECT AVG(ratings.rating)
+                     FROM books_tags_link AS tl, books_ratings_link AS bl, ratings
+                     WHERE tl.tag=tags.id AND bl.book=tl.book AND
+                     ratings.id = bl.rating AND ratings.rating <> 0 AND
+                     books_list_filter(bl.book)) avg_rating,
+                     name AS sort
+                FROM tags;
+CREATE VIEW tag_browser_publishers AS SELECT
+                    id,
+                    name,
+                    (SELECT COUNT(id) FROM books_publishers_link WHERE publisher=publishers.id) count,
+                    (SELECT AVG(ratings.rating)
+                     FROM books_publishers_link AS tl, books_ratings_link AS bl, ratings
+                     WHERE tl.publisher=publishers.id AND bl.book=tl.book AND
+                     ratings.id = bl.rating AND ratings.rating <> 0) avg_rating,
+                     name AS sort
+                FROM publishers;
+CREATE VIEW tag_browser_ratings AS SELECT
+                    id,
+                    rating,
+                    (SELECT COUNT(id) FROM books_ratings_link WHERE rating=ratings.id) count,
+                    (SELECT AVG(ratings.rating)
+                     FROM books_ratings_link AS tl, books_ratings_link AS bl, ratings
+                     WHERE tl.rating=ratings.id AND bl.book=tl.book AND
+                     ratings.id = bl.rating AND ratings.rating <> 0) avg_rating,
+                     rating AS sort
+                FROM ratings;
+CREATE VIEW tag_browser_series AS SELECT
+                    id,
+                    name,
+                    (SELECT COUNT(id) FROM books_series_link WHERE series=series.id) count,
+                    (SELECT AVG(ratings.rating)
+                     FROM books_series_link AS tl, books_ratings_link AS bl, ratings
+                     WHERE tl.series=series.id AND bl.book=tl.book AND
+                     ratings.id = bl.rating AND ratings.rating <> 0) avg_rating,
+                     (title_sort(name)) AS sort
+                FROM series;
+CREATE VIEW tag_browser_tags AS SELECT
+                    id,
+                    name,
+                    (SELECT COUNT(id) FROM books_tags_link WHERE tag=tags.id) count,
+                    (SELECT AVG(ratings.rating)
+                     FROM books_tags_link AS tl, books_ratings_link AS bl, ratings
+                     WHERE tl.tag=tags.id AND bl.book=tl.book AND
+                     ratings.id = bl.rating AND ratings.rating <> 0) avg_rating,
+                     name AS sort
+                FROM tags;
 CREATE INDEX authors_idx ON books (author_sort COLLATE NOCASE);
 CREATE INDEX books_authors_link_aidx ON books_authors_link (author);
 CREATE INDEX books_authors_link_bidx ON books_authors_link (book);
 CREATE INDEX books_idx ON books (sort COLLATE NOCASE);
+CREATE INDEX books_languages_link_aidx ON books_languages_link (lang_code);
+CREATE INDEX books_languages_link_bidx ON books_languages_link (book);
 CREATE INDEX books_publishers_link_aidx ON books_publishers_link (publisher);
 CREATE INDEX books_publishers_link_bidx ON books_publishers_link (book);
 CREATE INDEX books_ratings_link_aidx ON books_ratings_link (rating);
@ -111,32 +273,38 @@ CREATE INDEX books_tags_link_bidx ON books_tags_link (book);
 CREATE INDEX comments_idx ON comments (book);
 CREATE INDEX conversion_options_idx_a ON conversion_options (format COLLATE NOCASE);
 CREATE INDEX conversion_options_idx_b ON conversion_options (book);
+CREATE INDEX custom_columns_idx ON custom_columns (label);
 CREATE INDEX data_idx ON data (book);
+CREATE INDEX formats_idx ON data (format);
+CREATE INDEX languages_idx ON languages (lang_code COLLATE NOCASE);
 CREATE INDEX publishers_idx ON publishers (name COLLATE NOCASE);
-CREATE INDEX series_idx ON series (sort COLLATE NOCASE);
+CREATE INDEX series_idx ON series (name COLLATE NOCASE);
 CREATE INDEX tags_idx ON tags (name COLLATE NOCASE);
 CREATE TRIGGER books_delete_trg
-        AFTER DELETE ON books
-        BEGIN
-            DELETE FROM books_authors_link WHERE book=OLD.id;
-            DELETE FROM books_publishers_link WHERE book=OLD.id;
-            DELETE FROM books_ratings_link WHERE book=OLD.id;
-            DELETE FROM books_series_link WHERE book=OLD.id;
-            DELETE FROM books_tags_link WHERE book=OLD.id;
-            DELETE FROM data WHERE book=OLD.id;
-            DELETE FROM comments WHERE book=OLD.id;
-            DELETE FROM conversion_options WHERE book=OLD.id;
+            AFTER DELETE ON books
+            BEGIN
+                DELETE FROM books_authors_link WHERE book=OLD.id;
+                DELETE FROM books_publishers_link WHERE book=OLD.id;
+                DELETE FROM books_ratings_link WHERE book=OLD.id;
+                DELETE FROM books_series_link WHERE book=OLD.id;
+                DELETE FROM books_tags_link WHERE book=OLD.id;
+                DELETE FROM books_languages_link WHERE book=OLD.id;
+                DELETE FROM data WHERE book=OLD.id;
+                DELETE FROM comments WHERE book=OLD.id;
+                DELETE FROM conversion_options WHERE book=OLD.id;
+                DELETE FROM books_plugin_data WHERE book=OLD.id;
+                DELETE FROM identifiers WHERE book=OLD.id;
        END;
-CREATE TRIGGER books_insert_trg
-        AFTER INSERT ON books
+CREATE TRIGGER books_insert_trg AFTER INSERT ON books
        BEGIN
-          UPDATE books SET sort=title_sort(NEW.title) WHERE id=NEW.id;
+            UPDATE books SET sort=title_sort(NEW.title),uuid=uuid4() WHERE id=NEW.id;
        END;
 CREATE TRIGGER books_update_trg
-        AFTER UPDATE ON books
-        BEGIN
-          UPDATE books SET sort=title_sort(NEW.title) WHERE id=NEW.id;
-        END;
+            AFTER UPDATE ON books
+            BEGIN
+            UPDATE books SET sort=title_sort(NEW.title)
+                         WHERE id=NEW.id AND OLD.title <> NEW.title;
+            END;
 CREATE TRIGGER fkc_comments_insert
        BEFORE INSERT ON comments
        BEGIN
@ -169,23 +337,41 @@ CREATE TRIGGER fkc_data_update
                THEN RAISE(ABORT, 'Foreign key violation: book not in books')
            END;
        END;
-CREATE TRIGGER fkc_delete_books_authors_link
+CREATE TRIGGER fkc_delete_on_authors
        BEFORE DELETE ON authors
        BEGIN
            SELECT CASE
-                WHEN (SELECT COUNT(id) FROM books_authors_link WHERE book=OLD.book) > 0
-                THEN RAISE(ABORT, 'Foreign key violation: author is still referenced')
+                WHEN (SELECT COUNT(id) FROM books_authors_link WHERE author=OLD.id) > 0
+                THEN RAISE(ABORT, 'Foreign key violation: authors is still referenced')
            END;
        END;
-CREATE TRIGGER fkc_delete_books_publishers_link
+CREATE TRIGGER fkc_delete_on_languages
+        BEFORE DELETE ON languages
+        BEGIN
+            SELECT CASE
+                WHEN (SELECT COUNT(id) FROM books_languages_link WHERE lang_code=OLD.id) > 0
+                THEN RAISE(ABORT, 'Foreign key violation: language is still referenced')
+            END;
+        END;
+CREATE TRIGGER fkc_delete_on_languages_link
+        BEFORE INSERT ON books_languages_link
+        BEGIN
+          SELECT CASE
+              WHEN (SELECT id from books WHERE id=NEW.book) IS NULL
+              THEN RAISE(ABORT, 'Foreign key violation: book not in books')
+              WHEN (SELECT id from languages WHERE id=NEW.lang_code) IS NULL
+              THEN RAISE(ABORT, 'Foreign key violation: lang_code not in languages')
+          END;
+        END;
+CREATE TRIGGER fkc_delete_on_publishers
        BEFORE DELETE ON publishers
        BEGIN
            SELECT CASE
-                WHEN (SELECT COUNT(id) FROM books_publishers_link WHERE book=OLD.book) > 0
-                THEN RAISE(ABORT, 'Foreign key violation: publisher is still referenced')
+                WHEN (SELECT COUNT(id) FROM books_publishers_link WHERE publisher=OLD.id) > 0
+                THEN RAISE(ABORT, 'Foreign key violation: publishers is still referenced')
            END;
        END;
-CREATE TRIGGER fkc_delete_books_series_link
+CREATE TRIGGER fkc_delete_on_series
        BEFORE DELETE ON series
        BEGIN
            SELECT CASE
@ -193,12 +379,12 @@ CREATE TRIGGER fkc_delete_books_series_link
                THEN RAISE(ABORT, 'Foreign key violation: series is still referenced')
            END;
        END;
-CREATE TRIGGER fkc_delete_books_tags_link
+CREATE TRIGGER fkc_delete_on_tags
        BEFORE DELETE ON tags
        BEGIN
            SELECT CASE
                WHEN (SELECT COUNT(id) FROM books_tags_link WHERE tag=OLD.id) > 0
-                THEN RAISE(ABORT, 'Foreign key violation: tag is still referenced')
+                THEN RAISE(ABORT, 'Foreign key violation: tags is still referenced')
            END;
        END;
 CREATE TRIGGER fkc_insert_books_authors_link
@ -267,6 +453,22 @@ CREATE TRIGGER fkc_update_books_authors_link_b
                THEN RAISE(ABORT, 'Foreign key violation: author not in authors')
            END;
        END;
+CREATE TRIGGER fkc_update_books_languages_link_a
+        BEFORE UPDATE OF book ON books_languages_link
+        BEGIN
+            SELECT CASE
+                WHEN (SELECT id from books WHERE id=NEW.book) IS NULL
+                THEN RAISE(ABORT, 'Foreign key violation: book not in books')
+            END;
+        END;
+CREATE TRIGGER fkc_update_books_languages_link_b
+        BEFORE UPDATE OF lang_code ON books_languages_link
+        BEGIN
+            SELECT CASE
+                WHEN (SELECT id from languages WHERE id=NEW.lang_code) IS NULL
+                THEN RAISE(ABORT, 'Foreign key violation: lang_code not in languages')
+            END;
+        END;
 CREATE TRIGGER fkc_update_books_publishers_link_a
        BEFORE UPDATE OF book ON books_publishers_link
        BEGIN
@ -341,3 +543,4 @@ CREATE TRIGGER series_update_trg
        BEGIN
          UPDATE series SET sort=NEW.name WHERE id=NEW.id;
        END;
+pragma user_version=20;
--- a/resources/quick_start.epub
+++ b/resources/quick_start.epub
--- a/resources/templates/book_details.css
+++ b/resources/templates/book_details.css
@ -2,6 +2,11 @@ a {
    text-decoration: none;
    color: blue
 }
+
+a:hover {
+    color: red
+}
+
 .comments { 
    margin-top: 0;
    padding-top: 0;
--- a/setup/installer/windows/main.c
+++ b/setup/installer/windows/main.c
@ -23,6 +23,9 @@ wWinMain(HINSTANCE Inst, HINSTANCE PrevInst,
 	ret = execute_python_entrypoint(BASENAME, MODULE, FUNCTION,
 					stdout_redirect, stderr_redirect);

+    if (stdout != NULL) fclose(stdout);
+    if (stderr != NULL) fclose(stderr);
+
    DeleteFile(stdout_redirect);
    DeleteFile(stderr_redirect);

--- a/setup/installer/windows/notes.rst
+++ b/setup/installer/windows/notes.rst
@ -69,7 +69,24 @@ nmake -f ms\ntdll.mak install
 Qt
 --------

-Extract Qt sourcecode to C:\Qt\4.x.x. Run configure and make::
+Extract Qt sourcecode to C:\Qt\4.x.x. 
+
+Qt uses its own routine to locate and load "system libraries" including the openssl libraries needed for "Get Books". This means that we have to apply the following patch to have Qt load the openssl libraries bundled with calibre:
+
+
+--- src/corelib/plugin/qsystemlibrary.cpp	2011-02-22 05:04:00.000000000 -0700
+++ src/corelib/plugin/qsystemlibrary.cpp	2011-04-25 20:53:13.635247466 -0600
+@@ -110,7 +110,7 @@ HINSTANCE QSystemLibrary::load(const wch
+ 
+ #if !defined(QT_BOOTSTRAPPED)
+     if (!onlySystemDirectory)
+-        searchOrder << QFileInfo(qAppFileName()).path();
+        searchOrder << (QFileInfo(qAppFileName()).path().replace(QLatin1Char('/'), QLatin1Char('\\')) + QString::fromLatin1("\\DLLs\\"));
+ #endif
+     searchOrder << qSystemDirectory();
+ 
+
+Now, run configure and make::

    configure -opensource -release -qt-zlib -qt-gif -qt-libmng -qt-libpng -qt-libtiff -qt-libjpeg -release -platform win32-msvc2008 -no-qt3support -webkit -xmlpatterns -no-phonon -no-style-plastique -no-style-cleanlooks -no-style-motif -no-style-cde -no-declarative -no-scripttools -no-audio-backend -no-multimedia -no-dbus -no-openvg -no-opengl -no-qt3support -confirm-license -nomake examples -nomake demos -nomake docs -openssl -I Q:\openssl\include -L Q:\openssl\lib && nmake

--- a/setup/installer/windows/wix-template.xml
+++ b/setup/installer/windows/wix-template.xml
@ -11,7 +11,10 @@
 						SummaryCodepage='1252' />

                <Media Id="1" Cabinet="{app}.cab" CompressionLevel="{compression}" EmbedCab="yes" />
-                
+                <!-- The following line is needed because of the patch to QtCore4.dll. You can remove this line
+                     after you update Qt beyond 4.7.2. 'emus' means re-install even if version is the same not just if it is older. -->
+                <Property Id='REINSTALLMODE' Value='emus'/>
+
                <Upgrade Id="{upgrade_code}">
                    <UpgradeVersion Maximum="{version}"
                       IncludeMaximum="yes"
--- a/setup/upload.py
+++ b/setup/upload.py
@ -347,9 +347,10 @@ class UploadUserManual(Command): # {{{
        with NamedTemporaryFile(suffix='.zip') as f:
            os.fchmod(f.fileno(),
                stat.S_IRUSR|stat.S_IRGRP|stat.S_IROTH|stat.S_IWRITE)
-            with CurrentDir(self.d(path)):
+            with CurrentDir(path):
                with ZipFile(f, 'w') as zf:
                    for x in os.listdir('.'):
+                        if x.endswith('.swp'): continue
                        zf.write(x)
                        if os.path.isdir(x):
                            for y in os.listdir(x):
--- a/src/calibre/init.py
+++ b/src/calibre/init.py
@ -388,7 +388,11 @@ class CurrentDir(object):
        return self.cwd

    def __exit__(self, *args):
-        os.chdir(self.cwd)
+        try:
+            os.chdir(self.cwd)
+        except:
+            # The previous CWD no longer exists
+            pass


 class StreamReadWrapper(object):
--- a/src/calibre/constants.py
+++ b/src/calibre/constants.py
@ -4,7 +4,7 @@ __license__   = 'GPL v3'
 __copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
 __docformat__ = 'restructuredtext en'
 __appname__   = u'calibre'
-numeric_version = (0, 7, 57)
+numeric_version = (0, 8, 0)
 __version__   = u'.'.join(map(unicode, numeric_version))
 __author__    = u"Kovid Goyal <kovid@kovidgoyal.net>"

--- a/src/calibre/customize/init.py
+++ b/src/calibre/customize/init.py
@ -449,7 +449,7 @@ class CatalogPlugin(Plugin): # {{{
                          ['author_sort','authors','comments','cover','formats',
                           'id','isbn','ondevice','pubdate','publisher','rating',
                           'series_index','series','size','tags','timestamp',
-                           'title','uuid'])
+                           'title_sort','title','uuid'])
        all_custom_fields = set(db.custom_field_keys())
        all_fields = all_std_fields.union(all_custom_fields)

@ -607,6 +607,7 @@ class StoreBase(Plugin): # {{{
    supported_platforms = ['windows', 'osx', 'linux']
    author         = 'John Schember'
    type = _('Store')
+    minimum_calibre_version = (0, 8, 0)

    actual_plugin = None

--- a/src/calibre/customize/builtins.py
+++ b/src/calibre/customize/builtins.py
@ -9,7 +9,6 @@ from calibre.customize import FileTypePlugin, MetadataReaderPlugin, \
 from calibre.constants import numeric_version
 from calibre.ebooks.metadata.archive import ArchiveExtract, get_cbz_metadata
 from calibre.ebooks.metadata.opf2 import metadata_to_opf
-from calibre.utils.config import test_eight_code

 # To archive plugins {{{
 class HTML2ZIP(FileTypePlugin):
@ -596,6 +595,7 @@ from calibre.devices.jetbook.driver import JETBOOK, MIBUK, JETBOOK_MINI
 from calibre.devices.kindle.driver import KINDLE, KINDLE2, KINDLE_DX
 from calibre.devices.nook.driver import NOOK, NOOK_COLOR
 from calibre.devices.prs505.driver import PRS505
+from calibre.devices.user_defined.driver import USER_DEFINED
 from calibre.devices.android.driver import ANDROID, S60
 from calibre.devices.nokia.driver import N770, N810, E71X, E52
 from calibre.devices.eslick.driver import ESLICK, EBK52
@ -613,6 +613,7 @@ from calibre.devices.misc import PALMPRE, AVANT, SWEEX, PDNOVEL, \
 from calibre.devices.folder_device.driver import FOLDER_DEVICE_FOR_CONFIG
 from calibre.devices.kobo.driver import KOBO
 from calibre.devices.bambook.driver import BAMBOOK
+from calibre.devices.boeye.driver import BOEYE_BEX, BOEYE_BDX

 from calibre.library.catalog import CSV_XML, EPUB_MOBI, BIBTEX
 from calibre.ebooks.epub.fix.unmanifested import Unmanifested
@ -621,29 +622,16 @@ from calibre.ebooks.epub.fix.epubcheck import Epubcheck
 plugins = [HTML2ZIP, PML2PMLZ, TXT2TXTZ, ArchiveExtract, CSV_XML, EPUB_MOBI, BIBTEX, Unmanifested,
        Epubcheck, ]

-if test_eight_code:
 # New metadata download plugins {{{
-    from calibre.ebooks.metadata.sources.google import GoogleBooks
-    from calibre.ebooks.metadata.sources.amazon import Amazon
-    from calibre.ebooks.metadata.sources.openlibrary import OpenLibrary
-    from calibre.ebooks.metadata.sources.isbndb import ISBNDB
-    from calibre.ebooks.metadata.sources.overdrive import OverDrive
-    from calibre.ebooks.metadata.sources.douban import Douban
-    
-    plugins += [GoogleBooks, Amazon, OpenLibrary, ISBNDB, OverDrive, Douban]
+from calibre.ebooks.metadata.sources.google import GoogleBooks
+from calibre.ebooks.metadata.sources.amazon import Amazon
+from calibre.ebooks.metadata.sources.openlibrary import OpenLibrary
+from calibre.ebooks.metadata.sources.isbndb import ISBNDB
+from calibre.ebooks.metadata.sources.overdrive import OverDrive
+from calibre.ebooks.metadata.sources.douban import Douban
+plugins += [GoogleBooks, Amazon, OpenLibrary, ISBNDB, OverDrive, Douban]

 # }}}
-else:
-    from calibre.ebooks.metadata.fetch import GoogleBooks, ISBNDB, Amazon, \
-        KentDistrictLibrary
-    from calibre.ebooks.metadata.douban import DoubanBooks
-    from calibre.ebooks.metadata.nicebooks import NiceBooks, NiceBooksCovers
-    from calibre.ebooks.metadata.covers import OpenLibraryCovers, \
-            AmazonCovers, DoubanCovers
-
-    plugins += [GoogleBooks, ISBNDB, Amazon,
-        OpenLibraryCovers, AmazonCovers, DoubanCovers,
-        NiceBooksCovers, KentDistrictLibrary, DoubanBooks, NiceBooks]

 plugins += [
    ComicInput,
@ -756,6 +744,9 @@ plugins += [
    EEEREADER,
    NEXTBOOK,
    ITUNES,
+    BOEYE_BEX,
+    BOEYE_BDX,
+    USER_DEFINED,
 ]
 plugins += [x for x in list(locals().values()) if isinstance(x, type) and \
                                        x.__name__.endswith('MetadataReader')]
@ -868,10 +859,7 @@ plugins += [ActionAdd, ActionFetchAnnotations, ActionGenerateCatalog,
        ActionRestart, ActionOpenFolder, ActionConnectShare,
        ActionSendToDevice, ActionHelp, ActionPreferences, ActionSimilarBooks,
        ActionAddToLibrary, ActionEditCollections, ActionChooseLibrary,
-        ActionCopyToLibrary, ActionTweakEpub, ActionNextMatch]
-
-if test_eight_code:
-    plugins += [ActionStore]
+        ActionCopyToLibrary, ActionTweakEpub, ActionNextMatch, ActionStore]

 # }}}

@ -1097,10 +1085,8 @@ class Misc(PreferencesPlugin):

 plugins += [LookAndFeel, Behavior, Columns, Toolbar, Search, InputOptions,
        CommonOptions, OutputOptions, Adding, Saving, Sending, Plugboard,
-        Email, Server, Plugins, Tweaks, Misc, TemplateFunctions]
-
-if test_eight_code:
-    plugins.append(MetadataSources)
+        Email, Server, Plugins, Tweaks, Misc, TemplateFunctions,
+        MetadataSources]

 #}}}

@ -1110,6 +1096,11 @@ class StoreAmazonKindleStore(StoreBase):
    description = _('Kindle books from Amazon')
    actual_plugin = 'calibre.gui2.store.amazon_plugin:AmazonKindleStore'

+class StoreAmazonUKKindleStore(StoreBase):
+    name = 'Amazon UK Kindle'
+    description = _('Kindle books from Amazon.uk')
+    actual_plugin = 'calibre.gui2.store.amazon_uk_plugin:AmazonUKKindleStore'
+
 class StoreBaenWebScriptionStore(StoreBase):
    name = 'Baen WebScription'
    description = _('Ebooks for readers.')
@ -1175,10 +1166,27 @@ class StoreSmashwordsStore(StoreBase):
    description = _('Your ebook. Your way.')
    actual_plugin = 'calibre.gui2.store.smashwords_plugin:SmashwordsStore'

-plugins += [StoreAmazonKindleStore, StoreBaenWebScriptionStore, StoreBNStore,
+class StoreWaterstonesUKStore(StoreBase):
+    name = 'Waterstones UK'
+    description = _('Feel every word')
+    actual_plugin = 'calibre.gui2.store.waterstones_uk_plugin:WaterstonesUKStore'
+
+class StoreFoylesUKStore(StoreBase):
+    name = 'Foyles UK'
+    description = _('Foyles of London, online')
+    actual_plugin = 'calibre.gui2.store.foyles_uk_plugin:FoylesUKStore'
+
+class AmazonDEKindleStore(StoreBase):
+    name = 'Amazon DE Kindle'
+    description = _('Kindle eBooks')
+    actual_plugin = 'calibre.gui2.store.amazon_de_plugin:AmazonDEKindleStore'
+
+plugins += [StoreAmazonKindleStore, AmazonDEKindleStore, StoreAmazonUKKindleStore,
+    StoreBaenWebScriptionStore, StoreBNStore,
    StoreBeWriteStore, StoreDieselEbooksStore, StoreEbookscomStore,
-    StoreEHarlequinStoretore,
-    StoreFeedbooksStore, StoreGutenbergStore, StoreKoboStore, StoreManyBooksStore,
-    StoreMobileReadStore, StoreOpenLibraryStore, StoreSmashwordsStore]
+    StoreEHarlequinStoretore, StoreFeedbooksStore,
+    StoreFoylesUKStore, StoreGutenbergStore, StoreKoboStore, StoreManyBooksStore,
+    StoreMobileReadStore, StoreOpenLibraryStore, StoreSmashwordsStore,
+    StoreWaterstonesUKStore]

 # }}}
--- a/src/calibre/customize/ui.py
+++ b/src/calibre/customize/ui.py
@ -15,12 +15,11 @@ from calibre.customize.profiles import InputProfile, OutputProfile
 from calibre.customize.builtins import plugins as builtin_plugins
 from calibre.devices.interface import DevicePlugin
 from calibre.ebooks.metadata import MetaInformation
-from calibre.ebooks.metadata.covers import CoverDownload
-from calibre.ebooks.metadata.fetch import MetadataSource
-from calibre.utils.config import make_config_dir, Config, ConfigProxy, \
-                                 plugin_dir, OptionParser, prefs
+from calibre.utils.config import (make_config_dir, Config, ConfigProxy,
+                                 plugin_dir, OptionParser)
 from calibre.ebooks.epub.fix import ePubFixer
 from calibre.ebooks.metadata.sources.base import Source
+from calibre.constants import DEBUG

 builtin_names = frozenset([p.name for p in builtin_plugins])

@ -93,8 +92,7 @@ def restore_plugin_state_to_default(plugin_or_name):
    config['enabled_plugins'] = ep

 default_disabled_plugins = set([
-    'Douban Books', 'Douban.com covers', 'Nicebooks', 'Nicebooks covers',
-    'Kent District Library'
+    'Overdrive',
 ])

 def is_disabled(plugin):
@ -190,44 +188,6 @@ def output_profiles():
            yield plugin
 # }}}

-# Metadata sources {{{
-def metadata_sources(metadata_type='basic', customize=True, isbndb_key=None):
-    for plugin in _initialized_plugins:
-        if isinstance(plugin, MetadataSource) and \
-                plugin.metadata_type == metadata_type:
-            if is_disabled(plugin):
-                continue
-            if customize:
-                customization = config['plugin_customization']
-                plugin.site_customization = customization.get(plugin.name, None)
-            if plugin.name == 'IsbnDB' and isbndb_key is not None:
-                plugin.site_customization = isbndb_key
-            yield plugin
-
-def get_isbndb_key():
-    return config['plugin_customization'].get('IsbnDB', None)
-
-def set_isbndb_key(key):
-    for plugin in _initialized_plugins:
-        if plugin.name == 'IsbnDB':
-            return customize_plugin(plugin, key)
-
-def migrate_isbndb_key():
-    key = prefs['isbndb_com_key']
-    if key:
-        prefs.set('isbndb_com_key', '')
-        set_isbndb_key(key)
-
-def cover_sources():
-    customization = config['plugin_customization']
-    for plugin in _initialized_plugins:
-        if isinstance(plugin, CoverDownload):
-            if not is_disabled(plugin):
-                plugin.site_customization = customization.get(plugin.name, '')
-                yield plugin
-
-# }}}
-
 # Interface Actions # {{{

 def interface_actions():
@ -527,8 +487,9 @@ def initialize_plugins():
            plugin = initialize_plugin(plugin, None if isinstance(zfp, type) else zfp)
            _initialized_plugins.append(plugin)
        except:
-            print 'Failed to initialize plugin...'
-            traceback.print_exc()
+            print 'Failed to initialize plugin:', repr(zfp)
+            if DEBUG:
+                traceback.print_exc()
    _initialized_plugins.sort(cmp=lambda x,y:cmp(x.priority, y.priority), reverse=True)
    reread_filetype_plugins()
    reread_metadata_plugins()
--- a/src/calibre/devices/init.py
+++ b/src/calibre/devices/init.py
@ -156,3 +156,60 @@ def debug(ioreg_to_tmp=False, buf=None):
        sys.stdout = oldo
        sys.stderr = olde

+def device_info(ioreg_to_tmp=False, buf=None):
+    from calibre.devices.scanner import DeviceScanner, win_pnp_drives
+    from calibre.constants import iswindows
+    import re
+
+    res = {}
+    device_details = {}
+    device_set = set()
+    drive_details = {}
+    drive_set = set()
+    res['device_set'] = device_set
+    res['device_details'] = device_details
+    res['drive_details'] = drive_details
+    res['drive_set'] = drive_set
+
+    try:
+        s = DeviceScanner()
+        s.scan()
+        devices = (s.devices)
+        if not iswindows:
+            devices = [list(x) for x in devices]
+            for dev in devices:
+                for i in range(3):
+                    dev[i] = hex(dev[i])
+                d = dev[0] + dev[1] + dev[2]
+                device_set.add(d)
+                device_details[d] = dev[0:3]
+        else:
+            for dev in devices:
+                vid = re.search('vid_([0-9a-f]*)&', dev)
+                if vid:
+                    vid = vid.group(1)
+                    pid = re.search('pid_([0-9a-f]*)&', dev)
+                    if pid:
+                        pid = pid.group(1)
+                        rev = re.search('rev_([0-9a-f]*)$', dev)
+                        if rev:
+                            rev = rev.group(1)
+                            d = vid+pid+rev
+                            device_set.add(d)
+                            device_details[d] = (vid, pid, rev)
+
+            drives = win_pnp_drives(debug=False)
+            for drive,details in drives.iteritems():
+                order = 'ORD_' + str(drive.order)
+                ven = re.search('VEN_([^&]*)&', details)
+                if ven:
+                    ven = ven.group(1)
+                    prod = re.search('PROD_([^&]*)&', details)
+                    if prod:
+                        prod = prod.group(1)
+                        d = (order, ven, prod)
+                        drive_details[drive] = d
+                        drive_set.add(drive)
+    finally:
+        pass
+    return res
--- a/src/calibre/devices/android/driver.py
+++ b/src/calibre/devices/android/driver.py
@ -62,7 +62,7 @@ class ANDROID(USBMS):
            0x502 : { 0x3203 : [0x0100]},

            # Dell
-            0x413c : { 0xb007 : [0x0100, 0x0224]},
+            0x413c : { 0xb007 : [0x0100, 0x0224, 0x0226]},

            # LG
            0x1004 : { 0x61cc : [0x100], 0x61ce : [0x100], 0x618e : [0x226] },
@ -109,10 +109,10 @@ class ANDROID(USBMS):
            'SGH-T849', '_MB300', 'A70S', 'S_ANDROID', 'A101IT', 'A70H',
            'IDEOS_TABLET', 'MYTOUCH_4G', 'UMS_COMPOSITE', 'SCH-I800_CARD',
            '7', 'A956', 'A955', 'A43', 'ANDROID_PLATFORM', 'TEGRA_2',
-            'MB860', 'MULTI-CARD', 'MID7015A', 'INCREDIBLE']
+            'MB860', 'MULTI-CARD', 'MID7015A', 'INCREDIBLE', 'A7EB']
    WINDOWS_CARD_A_MEM = ['ANDROID_PHONE', 'GT-I9000_CARD', 'SGH-I897',
            'FILE-STOR_GADGET', 'SGH-T959', 'SAMSUNG_ANDROID', 'GT-P1000_CARD',
-            'A70S', 'A101IT', '7', 'INCREDIBLE']
+            'A70S', 'A101IT', '7', 'INCREDIBLE', 'A7EB', 'SGH-T849_CARD']

    OSX_MAIN_MEM = 'Android Device Main Memory'

--- a/src/calibre/devices/apple/driver.py
+++ b/src/calibre/devices/apple/driver.py
@ -163,6 +163,8 @@ class ITUNES(DriverBase):
        settings()
        set_progress_reporter()
        upload_books()
+         _get_fpath()
+          _update_epub_metadata()
        add_books_to_metadata()
        use_plugboard_ext()
        set_plugboard()
@ -460,7 +462,7 @@ class ITUNES(DriverBase):

                            cached_books[this_book.path] = {
                             'title':book.Name,
-                             'author':book.artist().split(' & '),
+                             'author':book.Artist.split(' & '),
                             'lib_book':library_books[this_book.path] if this_book.path in library_books else None,
                             'uuid': book.Composer,
                             'format': 'pdf' if book.KindAsString.startswith('PDF') else 'epub'
@ -504,7 +506,7 @@ class ITUNES(DriverBase):
        if self.iTunes:
            # Check for connected book-capable device
            self.sources = self._get_sources()
-            if 'iPod' in self.sources:
+            if 'iPod' in self.sources and not self.ejected:
                #if DEBUG:
                    #sys.stdout.write('.')
                    #sys.stdout.flush()
@ -2034,16 +2036,17 @@ class ITUNES(DriverBase):
            if 'iPod' in self.sources:
                connected_device = self.sources['iPod']
                device = self.iTunes.sources[connected_device]
+                dev_books = None
                for pl in device.playlists():
                    if pl.special_kind() == appscript.k.Books:
                        if DEBUG:
                            self.log.info("  Book playlist: '%s'" % (pl.name()))
-                        books = pl.file_tracks()
+                        dev_books = pl.file_tracks()
                        break
                else:
                    self.log.error("  book_playlist not found")

-                for book in books:
+                for book in dev_books:
                    # This may need additional entries for international iTunes users
                    if book.kind() in self.Audiobooks:
                        if DEBUG:
@ -2621,42 +2624,42 @@ class ITUNES(DriverBase):
            # Touch the OPF timestamp
            try:
                zf_opf = ZipFile(fpath,'r')
+                fnames = zf_opf.namelist()
+                opf = [x for x in fnames if '.opf' in x][0]
            except:
                raise UserFeedback("'%s' is not a valid EPUB" % metadata.title,
                                   None,
                                   level=UserFeedback.WARN)
-            fnames = zf_opf.namelist()
-            opf = [x for x in fnames if '.opf' in x][0]
-            if opf:
-                opf_tree = etree.fromstring(zf_opf.read(opf))
-                md_els = opf_tree.xpath('.//*[local-name()="metadata"]')
-                if md_els:
-                    ts = md_els[0].find('.//*[@name="calibre:timestamp"]')
-                    if ts is not None:
-                        timestamp = ts.get('content')
-                        old_ts = parse_date(timestamp)
-                        metadata.timestamp = datetime.datetime(old_ts.year, old_ts.month, old_ts.day, old_ts.hour,
-                                                   old_ts.minute, old_ts.second, old_ts.microsecond+1, old_ts.tzinfo)
-                        if DEBUG:
-                            self.log.info("   existing timestamp: %s" % metadata.timestamp)
-                    else:
-                        metadata.timestamp = now()
-                        if DEBUG:
-                            self.log.info("   add timestamp: %s" % metadata.timestamp)
+
+            opf_tree = etree.fromstring(zf_opf.read(opf))
+            md_els = opf_tree.xpath('.//*[local-name()="metadata"]')
+            if md_els:
+                ts = md_els[0].find('.//*[@name="calibre:timestamp"]')
+                if ts is not None:
+                    timestamp = ts.get('content')
+                    old_ts = parse_date(timestamp)
+                    metadata.timestamp = datetime.datetime(old_ts.year, old_ts.month, old_ts.day, old_ts.hour,
+                                               old_ts.minute, old_ts.second, old_ts.microsecond+1, old_ts.tzinfo)
+                    if DEBUG:
+                        self.log.info("   existing timestamp: %s" % metadata.timestamp)
                else:
                    metadata.timestamp = now()
                    if DEBUG:
-                        self.log.warning("   missing <metadata> block in OPF file")
                        self.log.info("   add timestamp: %s" % metadata.timestamp)
-                # Force the language declaration for iBooks 1.1
-                #metadata.language = get_lang().replace('_', '-')
-
-                # Updates from metadata plugboard (ignoring publisher)
-                metadata.language = metadata_x.language
-
+            else:
+                metadata.timestamp = now()
                if DEBUG:
-                    if metadata.language != metadata_x.language:
-                        self.log.info("   rewriting language: <dc:language>%s</dc:language>" % metadata.language)
+                    self.log.warning("   missing <metadata> block in OPF file")
+                    self.log.info("   add timestamp: %s" % metadata.timestamp)
+            # Force the language declaration for iBooks 1.1
+            #metadata.language = get_lang().replace('_', '-')
+
+            # Updates from metadata plugboard (ignoring publisher)
+            metadata.language = metadata_x.language
+
+            if DEBUG:
+                if metadata.language != metadata_x.language:
+                    self.log.info("   rewriting language: <dc:language>%s</dc:language>" % metadata.language)

            zf_opf.close()

--- a/src/calibre/devices/boeye/init.py
+++ b/src/calibre/devices/boeye/init.py
--- a/src/calibre/devices/boeye/driver.py
+++ b/src/calibre/devices/boeye/driver.py
@ -0,0 +1,56 @@
+__license__   = 'GPL v3'
+__copyright__ = '2011,  Ken <ken at szboeye.com>'
+__docformat__ = 'restructuredtext en'
+
+'''
+Device driver for BOEYE serial readers
+'''
+
+from calibre.devices.usbms.driver import USBMS
+
+class BOEYE_BEX(USBMS):
+	name		= 'BOEYE BEX reader driver'
+	gui_name	= 'BOEYE BEX'
+	description	= _('Communicate with BOEYE BEX Serial eBook readers.')
+	author		= 'szboeye'
+	supported_platforms = ['windows', 'osx', 'linux']
+
+	FORMATS		= ['epub', 'mobi', 'fb2', 'lit', 'prc', 'pdf', 'rtf', 'txt', 'djvu', 'doc', 'chm', 'html', 'zip', 'pdb']
+
+	VENDOR_ID	= [0x0085]
+	PRODUCT_ID	= [0x600]
+
+	VENDOR_NAME	 = 'LINUX'
+	WINDOWS_MAIN_MEM = 'FILE-STOR_GADGET'
+	OSX_MAIN_MEM	 = 'Linux File-Stor Gadget Media'
+
+	MAIN_MEMORY_VOLUME_LABEL  = 'BOEYE BEX Storage Card'
+
+	EBOOK_DIR_MAIN	  = 'Documents'
+	SUPPORTS_SUB_DIRS = True
+
+class BOEYE_BDX(USBMS):
+	name		= 'BOEYE BDX reader driver'
+	gui_name	= 'BOEYE BDX'
+	description	= _('Communicate with BOEYE BDX serial eBook readers.')
+	author		= 'szboeye'
+	supported_platforms = ['windows', 'osx', 'linux']
+
+	FORMATS		= ['epub', 'mobi', 'fb2', 'lit', 'prc', 'pdf', 'rtf', 'txt', 'djvu', 'doc', 'chm', 'html', 'zip', 'pdb']
+
+	VENDOR_ID	= [0x0085]
+	PRODUCT_ID	= [0x800]
+
+	VENDOR_NAME	 = 'LINUX'
+	WINDOWS_MAIN_MEM   = 'FILE-STOR_GADGET'
+	WINDOWS_CARD_A_MEM = 'FILE-STOR_GADGET'
+
+	OSX_MAIN_MEM	 = 'Linux File-Stor Gadget Media'
+	OSX_CARD_A_MEM	 = 'Linux File-Stor Gadget Media'
+
+	MAIN_MEMORY_VOLUME_LABEL  = 'BOEYE BDX Internal Memory'
+	STORAGE_CARD_VOLUME_LABEL = 'BOEYE BDX Storage Card'
+
+	EBOOK_DIR_MAIN	 = 'Documents'
+	EBOOK_DIR_CARD_A = 'Documents'
+	SUPPORTS_SUB_DIRS = True
--- a/src/calibre/devices/hanlin/driver.py
+++ b/src/calibre/devices/hanlin/driver.py
@ -64,7 +64,7 @@ class HANLINV3(USBMS):
        return names

    def linux_swap_drives(self, drives):
-        if len(drives) < 2: return drives
+        if len(drives) < 2 or not drives[1] or not drives[2]: return drives
        drives = list(drives)
        t = drives[0]
        drives[0] = drives[1]
@ -95,7 +95,6 @@ class HANLINV5(HANLINV3):
    gui_name       = 'Hanlin V5'
    description    = _('Communicate with Hanlin V5 eBook readers.')

-
    VENDOR_ID	= [0x0492]
    PRODUCT_ID	= [0x8813]
    BCD         = [0x319]
--- a/src/calibre/devices/kindle/apnx.py
+++ b/src/calibre/devices/kindle/apnx.py
@ -164,7 +164,7 @@ class APNXBuilder(object):
                if c == '/':
                    closing = True
                    continue
-                elif c in ('d', 'p'):
+                elif c == 'p':
                    if closing:
                        in_p = False
                    else:
--- a/src/calibre/devices/misc.py
+++ b/src/calibre/devices/misc.py
@ -187,7 +187,7 @@ class LUMIREAD(USBMS):
            cfilepath = cfilepath.replace(os.sep+'books'+os.sep,
                    os.sep+'covers'+os.sep, 1)
            pdir = os.path.dirname(cfilepath)
-            if not os.exists(pdir):
+            if not os.path.exists(pdir):
                os.makedirs(pdir)
            with open(cfilepath+'.jpg', 'wb') as f:
                f.write(metadata.thumbnail[-1])
--- a/src/calibre/devices/usbms/deviceconfig.py
+++ b/src/calibre/devices/usbms/deviceconfig.py
@ -94,6 +94,9 @@ class DeviceConfig(object):
            if isinstance(cls.EXTRA_CUSTOMIZATION_MESSAGE, list):
                ec = []
                for i in range(0, len(cls.EXTRA_CUSTOMIZATION_MESSAGE)):
+                    if config_widget.opt_extra_customization[i] is None:
+                        ec.append(None)
+                        continue
                    if hasattr(config_widget.opt_extra_customization[i], 'isChecked'):
                        ec.append(config_widget.opt_extra_customization[i].isChecked())
                    else:
--- a/src/calibre/devices/user_defined/init.py
+++ b/src/calibre/devices/user_defined/init.py
--- a/src/calibre/devices/user_defined/driver.py
+++ b/src/calibre/devices/user_defined/driver.py
@ -0,0 +1,110 @@
+# -*- coding: utf-8 -*-
+
+__license__   = 'GPL v3'
+__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
+__docformat__ = 'restructuredtext en'
+
+from calibre.devices.usbms.driver import USBMS
+
+class USER_DEFINED(USBMS):
+
+    name           = 'User Defined USB driver'
+    gui_name       = 'User Defined USB Device'
+    author         = 'Kovid Goyal'
+    supported_platforms = ['windows', 'osx', 'linux']
+
+    # Ordered list of supported formats
+    FORMATS     = ['epub', 'mobi', 'pdf']
+
+    VENDOR_ID   = 0xFFFF
+    PRODUCT_ID  = 0xFFFF
+    BCD         = None
+
+    EBOOK_DIR_MAIN = ''
+    EBOOK_DIR_CARD_A = ''
+
+    VENDOR_NAME      = []
+    WINDOWS_MAIN_MEM = ''
+    WINDOWS_CARD_A_MEM = ''
+
+    OSX_MAIN_MEM = 'Device Main Memory'
+
+    MAIN_MEMORY_VOLUME_LABEL  = 'Device Main Memory'
+
+    SUPPORTS_SUB_DIRS = True
+
+    EXTRA_CUSTOMIZATION_MESSAGE = [
+        _('USB Vendor ID (in hex)') + ':::<p>' +
+            _('Get this ID using Preferences -> Misc -> Get information to '
+              'set up the user-defined device') + '</p>',
+        _('USB Product ID (in hex)')+ ':::<p>' +
+            _('Get this ID using Preferences -> Misc -> Get information to '
+              'set up the user-defined device') + '</p>',
+        _('USB Revision ID (in hex)')+ ':::<p>' +
+            _('Get this ID using Preferences -> Misc -> Get information to '
+              'set up the user-defined device') + '</p>',
+        '',
+        _('Windows main memory vendor string') + ':::<p>' +
+            _('This field is used only on windows. '
+              'Get this ID using Preferences -> Misc -> Get information to '
+              'set up the user-defined device') + '</p>',
+        _('Windows main memory ID string') + ':::<p>' +
+            _('This field is used only on windows. '
+              'Get this ID using Preferences -> Misc -> Get information to '
+              'set up the user-defined device') + '</p>',
+        _('Windows card A vendor string') + ':::<p>' +
+            _('This field is used only on windows. '
+              'Get this ID using Preferences -> Misc -> Get information to '
+              'set up the user-defined device') + '</p>',
+        _('Windows card A ID string') + ':::<p>' +
+            _('This field is used only on windows. '
+              'Get this ID using Preferences -> Misc -> Get information to '
+              'set up the user-defined device') + '</p>',
+        _('Main memory folder') + ':::<p>' +
+            _('Enter the folder where the books are to be stored. This folder '
+              'is prepended to any send_to_device template') + '</p>',
+        _('Card A folder') + ':::<p>' +
+            _('Enter the folder where the books are to be stored. This folder '
+              'is prepended to any send_to_device template') + '</p>',
+    ]
+    EXTRA_CUSTOMIZATION_DEFAULT = [
+                '0xffff',
+                '0xffff',
+                '0xffff',
+                None,
+                '',
+                '',
+                '',
+                '',
+                '',
+                '',
+    ]
+    OPT_USB_VENDOR_ID           = 0
+    OPT_USB_PRODUCT_ID          = 1
+    OPT_USB_REVISION_ID         = 2
+    OPT_USB_WINDOWS_MM_VEN_ID   = 4
+    OPT_USB_WINDOWS_MM_ID       = 5
+    OPT_USB_WINDOWS_CA_VEN_ID   = 6
+    OPT_USB_WINDOWS_CA_ID       = 7
+    OPT_MAIN_MEM_FOLDER         = 8
+    OPT_CARD_A_FOLDER           = 9
+
+    def initialize(self):
+        try:
+            e = self.settings().extra_customization
+            self.VENDOR_ID          = int(e[self.OPT_USB_VENDOR_ID], 16)
+            self.PRODUCT_ID         = int(e[self.OPT_USB_PRODUCT_ID], 16)
+            self.BCD                = [int(e[self.OPT_USB_REVISION_ID], 16)]
+            if e[self.OPT_USB_WINDOWS_MM_VEN_ID]:
+                self.VENDOR_NAME.append(e[self.OPT_USB_WINDOWS_MM_VEN_ID])
+            if e[self.OPT_USB_WINDOWS_CA_VEN_ID] and \
+                    e[self.OPT_USB_WINDOWS_CA_VEN_ID] not in self.VENDOR_NAME:
+                self.VENDOR_NAME.append(e[self.OPT_USB_WINDOWS_CA_VEN_ID])
+            self.WINDOWS_MAIN_MEM   = e[self.OPT_USB_WINDOWS_MM_ID] + '&'
+            self.WINDOWS_CARD_A_MEM = e[self.OPT_USB_WINDOWS_CA_ID] + '&'
+            self.EBOOK_DIR_MAIN     = e[self.OPT_MAIN_MEM_FOLDER]
+            self.EBOOK_DIR_CARD_A   = e[self.OPT_CARD_A_FOLDER]
+        except:
+            import traceback
+            traceback.print_exc()
+        USBMS.initialize(self)
--- a/src/calibre/ebooks/chm/input.py
+++ b/src/calibre/ebooks/chm/input.py
@ -19,12 +19,12 @@ class CHMInput(InputFormatPlugin):
    description = 'Convert CHM files to OEB'
    file_types  = set(['chm'])

-    def _chmtohtml(self, output_dir, chm_path, no_images, log):
+    def _chmtohtml(self, output_dir, chm_path, no_images, log, debug_dump=False):
        from calibre.ebooks.chm.reader import CHMReader
        log.debug('Opening CHM file')
        rdr = CHMReader(chm_path, log, self.opts)
        log.debug('Extracting CHM to %s' % output_dir)
-        rdr.extract_content(output_dir)
+        rdr.extract_content(output_dir, debug_dump=debug_dump)
        self._chm_reader = rdr
        return rdr.hhc_path

@ -47,7 +47,12 @@ class CHMInput(InputFormatPlugin):
            stream.close()
            log.debug('tdir=%s' % tdir)
            log.debug('stream.name=%s' % stream.name)
-            mainname = self._chmtohtml(tdir, chm_name, no_images, log)
+            debug_dump = False
+            odi = options.debug_pipeline
+            if odi:
+                debug_dump = os.path.join(odi, 'input')
+            mainname = self._chmtohtml(tdir, chm_name, no_images, log,
+                    debug_dump=debug_dump)
            mainpath = os.path.join(tdir, mainname)

            metadata = get_metadata_from_reader(self._chm_reader)
@ -56,7 +61,6 @@ class CHMInput(InputFormatPlugin):
            #from calibre import ipython
            #ipython()

-            odi = options.debug_pipeline
            options.debug_pipeline = None
            options.input_encoding = 'utf-8'
            # try a custom conversion:
--- a/src/calibre/ebooks/chm/reader.py
+++ b/src/calibre/ebooks/chm/reader.py
@ -97,7 +97,7 @@ class CHMReader(CHMFile):
            raise CHMError("'%s' is zero bytes in length!"%(path,))
        return data

-    def ExtractFiles(self, output_dir=os.getcwdu()):
+    def ExtractFiles(self, output_dir=os.getcwdu(), debug_dump=False):
        html_files = set([])
        for path in self.Contents():
            lpath = os.path.join(output_dir, path)
@ -123,6 +123,9 @@ class CHMReader(CHMFile):
                    self.log.warn('%r filename too long, skipping'%path)
                    continue
                raise
+        if debug_dump:
+            import shutil
+            shutil.copytree(output_dir, os.path.join(debug_dump, 'debug_dump'))
        for lpath in html_files:
            with open(lpath, 'r+b') as f:
                data = f.read()
@ -249,8 +252,8 @@ class CHMReader(CHMFile):
        if not os.path.isdir(dir):
            os.makedirs(dir)

-    def extract_content(self, output_dir=os.getcwdu()):
-        self.ExtractFiles(output_dir=output_dir)
+    def extract_content(self, output_dir=os.getcwdu(), debug_dump=False):
+        self.ExtractFiles(output_dir=output_dir, debug_dump=debug_dump)



--- a/src/calibre/ebooks/conversion/plumber.py
+++ b/src/calibre/ebooks/conversion/plumber.py
@ -854,7 +854,8 @@ OptionRecommendation(name='sr3_replace',
        if isinstance(ret, basestring):
            shutil.copytree(output_dir, out_dir)
        else:
-            os.makedirs(out_dir)
+            if not os.path.exists(out_dir):
+                os.makedirs(out_dir)
            self.dump_oeb(ret, out_dir)
        if self.input_fmt == 'recipe':
            zf = ZipFile(os.path.join(self.opts.debug_pipeline,
--- a/src/calibre/ebooks/conversion/preprocess.py
+++ b/src/calibre/ebooks/conversion/preprocess.py
@ -402,7 +402,7 @@ class HTMLPreProcessor(object):
                  (re.compile(r'((?<=</a>)\s*file:/{2,4}[A-Z].*<br>|file:////?[A-Z].*<br>(?=\s*<hr>))', re.IGNORECASE), lambda match: ''),

                  # Center separator lines
-                  (re.compile(u'<br>\s*(?P<break>([*#•✦=]+\s*)+)\s*<br>'), lambda match: '<p>\n<p style="text-align:center">' + match.group(1) + '</p>'),
+                  (re.compile(u'<br>\s*(?P<break>([*#•✦=] *){3,})\s*<br>'), lambda match: '<p>\n<p style="text-align:center">' + match.group('break') + '</p>'),

                  # Remove page links
                  (re.compile(r'<a name=\d+></a>', re.IGNORECASE), lambda match: ''),
--- a/src/calibre/ebooks/conversion/utils.py
+++ b/src/calibre/ebooks/conversion/utils.py
@ -156,17 +156,17 @@ class HeuristicProcessor(object):
        ]

        ITALICIZE_STYLE_PATS = [
-            r'(?msu)(?<=[\s>])_(?P<words>[^_]+)_',
-            r'(?msu)(?<=[\s>])/(?P<words>[^/\*>]+)/',
-            r'(?msu)(?<=[\s>])~~(?P<words>[^~]+)~~',
-            r'(?msu)(?<=[\s>])\*(?P<words>[^\*]+)\*',
-            r'(?msu)(?<=[\s>])~(?P<words>[^~]+)~',
-            r'(?msu)(?<=[\s>])_/(?P<words>[^/_]+)/_',
-            r'(?msu)(?<=[\s>])_\*(?P<words>[^\*_]+)\*_',
-            r'(?msu)(?<=[\s>])\*/(?P<words>[^/\*]+)/\*',
-            r'(?msu)(?<=[\s>])_\*/(?P<words>[^\*_]+)/\*_',
-            r'(?msu)(?<=[\s>])/:(?P<words>[^:/]+):/',
-            r'(?msu)(?<=[\s>])\|:(?P<words>[^:\|]+):\|',
+            ur'(?msu)(?<=[\s>"“\'‘])_(?P<words>[^_]+)_',
+            ur'(?msu)(?<=[\s>"“\'‘])/(?P<words>[^/\*>]+)/',
+            ur'(?msu)(?<=[\s>"“\'‘])~~(?P<words>[^~]+)~~',
+            ur'(?msu)(?<=[\s>"“\'‘])\*(?P<words>[^\*]+)\*',
+            ur'(?msu)(?<=[\s>"“\'‘])~(?P<words>[^~]+)~',
+            ur'(?msu)(?<=[\s>"“\'‘])_/(?P<words>[^/_]+)/_',
+            ur'(?msu)(?<=[\s>"“\'‘])_\*(?P<words>[^\*_]+)\*_',
+            ur'(?msu)(?<=[\s>"“\'‘])\*/(?P<words>[^/\*]+)/\*',
+            ur'(?msu)(?<=[\s>"“\'‘])_\*/(?P<words>[^\*_]+)/\*_',
+            ur'(?msu)(?<=[\s>"“\'‘])/:(?P<words>[^:/]+):/',
+            ur'(?msu)(?<=[\s>"“\'‘])\|:(?P<words>[^:\|]+):\|',
        ]

        for word in ITALICIZE_WORDS:
@ -518,13 +518,13 @@ class HeuristicProcessor(object):
        if re.findall('(<|>)', replacement_break):
            if re.match('^<hr', replacement_break):
                if replacement_break.find('width') != -1:
-                   width = int(re.sub('.*?width(:|=)(?P<wnum>\d+).*', '\g<wnum>', replacement_break))
-                   replacement_break = re.sub('(?i)(width=\d+\%?|width:\s*\d+(\%|px|pt|em)?;?)', '', replacement_break)
-                   divpercent = (100 - width) / 2
-                   hr_open = re.sub('45', str(divpercent), hr_open)
-                   scene_break = hr_open+replacement_break+'</div>'
+                    width = int(re.sub('.*?width(:|=)(?P<wnum>\d+).*', '\g<wnum>', replacement_break))
+                    replacement_break = re.sub('(?i)(width=\d+\%?|width:\s*\d+(\%|px|pt|em)?;?)', '', replacement_break)
+                    divpercent = (100 - width) / 2
+                    hr_open = re.sub('45', str(divpercent), hr_open)
+                    scene_break = hr_open+replacement_break+'</div>'
                else:
-                   scene_break = hr_open+'<hr style="height: 3px; background:#505050" /></div>'
+                    scene_break = hr_open+'<hr style="height: 3px; background:#505050" /></div>'
            elif re.match('^<img', replacement_break):
                scene_break = self.scene_break_open+replacement_break+'</p>'
            else:
@ -584,10 +584,10 @@ class HeuristicProcessor(object):
                #print "styles for this line are: "+str(styles)
                split_styles = []
                for style in styles:
-                   #print "style is: "+str(style)
-                   newstyle = style.split(':')
-                   #print "newstyle is: "+str(newstyle)
-                   split_styles.append(newstyle)
+                    #print "style is: "+str(style)
+                    newstyle = style.split(':')
+                    #print "newstyle is: "+str(newstyle)
+                    split_styles.append(newstyle)
                styles = split_styles
                for style, setting in styles:
                    if style == 'text-align' and setting != 'left':
--- a/src/calibre/ebooks/html/input.py
+++ b/src/calibre/ebooks/html/input.py
@ -309,9 +309,9 @@ class HTMLInput(InputFormatPlugin):

    def create_oebbook(self, htmlpath, basedir, opts, log, mi):
        from calibre.ebooks.conversion.plumber import create_oebbook
-        from calibre.ebooks.oeb.base import DirContainer, \
-            rewrite_links, urlnormalize, urldefrag, BINARY_MIME, OEB_STYLES, \
-            xpath
+        from calibre.ebooks.oeb.base import (DirContainer,
+            rewrite_links, urlnormalize, urldefrag, BINARY_MIME, OEB_STYLES,
+            xpath)
        from calibre import guess_type
        from calibre.ebooks.oeb.transforms.metadata import \
            meta_info_to_oeb_metadata
@ -345,7 +345,8 @@ class HTMLInput(InputFormatPlugin):
        htmlfile_map = {}
        for f in filelist:
            path = f.path
-            oeb.container = DirContainer(os.path.dirname(path), log)
+            oeb.container = DirContainer(os.path.dirname(path), log,
+                    ignore_opf=True)
            bname = os.path.basename(path)
            id, href = oeb.manifest.generate(id='html',
                    href=ascii_filename(bname))
@ -369,7 +370,7 @@ class HTMLInput(InputFormatPlugin):
        for f in filelist:
            path = f.path
            dpath = os.path.dirname(path)
-            oeb.container = DirContainer(dpath, log)
+            oeb.container = DirContainer(dpath, log, ignore_opf=True)
            item = oeb.manifest.hrefs[htmlfile_map[path]]
            rewrite_links(item.data, partial(self.resource_adder, base=dpath))

@ -409,7 +410,7 @@ class HTMLInput(InputFormatPlugin):
            if not item.linear: continue
            toc.add(title, item.href)

-        oeb.container = DirContainer(os.getcwdu(), oeb.log)
+        oeb.container = DirContainer(os.getcwdu(), oeb.log, ignore_opf=True)
        return oeb

    def link_to_local_path(self, link_, base=None):
@ -456,7 +457,7 @@ class HTMLInput(InputFormatPlugin):
                    href=bhref)
            self.oeb.log.debug('Added', link)
            self.oeb.container = self.DirContainer(os.path.dirname(link),
-                    self.oeb.log)
+                    self.oeb.log, ignore_opf=True)
            # Load into memory
            guessed = self.guess_type(href)[0]
            media_type = guessed or self.BINARY_MIME
--- a/src/calibre/ebooks/htmlz/input.py
+++ b/src/calibre/ebooks/htmlz/input.py
@ -7,10 +7,12 @@ __copyright__ = '2011, John Schember <john@nachtimwald.com>'
 __docformat__ = 'restructuredtext en'

 import os
+import posixpath

-from calibre import walk
+from calibre import guess_type, walk
 from calibre.customize.conversion import InputFormatPlugin
 from calibre.ebooks.chardet import xml_to_unicode
+from calibre.ebooks.metadata.opf2 import OPF
 from calibre.utils.zipfile import ZipFile

 class HTMLZInput(InputFormatPlugin):
@ -27,7 +29,7 @@ class HTMLZInput(InputFormatPlugin):

        # Extract content from zip archive.
        zf = ZipFile(stream)
-        zf.extractall('.')
+        zf.extractall()

        for x in walk('.'):
            if os.path.splitext(x)[1].lower() in ('.html', '.xhtml', '.htm'):
@ -70,5 +72,24 @@ class HTMLZInput(InputFormatPlugin):
        from calibre.ebooks.oeb.transforms.metadata import meta_info_to_oeb_metadata
        mi = get_file_type_metadata(stream, file_ext)
        meta_info_to_oeb_metadata(mi, oeb.metadata, log)
+        
+        # Get the cover path from the OPF.
+        cover_href = None
+        opf = None
+        for x in walk('.'):
+            if os.path.splitext(x)[1].lower() in ('.opf'):
+                opf = x
+                break
+        if opf:
+            opf = OPF(opf)
+            cover_href = posixpath.relpath(opf.cover, os.path.dirname(stream.name))
+        # Set the cover.
+        if cover_href:
+            cdata = None
+            with open(cover_href, 'rb') as cf:
+                cdata = cf.read()
+            id, href = oeb.manifest.generate('cover', cover_href)
+            oeb.manifest.add(id, href, guess_type(cover_href)[0], data=cdata)
+            oeb.guide.add('cover', 'Cover', href)

        return oeb
--- a/src/calibre/ebooks/htmlz/output.py
+++ b/src/calibre/ebooks/htmlz/output.py
@ -7,11 +7,13 @@ __copyright__ = '2011, John Schember <john@nachtimwald.com>'
 __docformat__ = 'restructuredtext en'

 import os
+from cStringIO import StringIO

 from lxml import etree

 from calibre.customize.conversion import OutputFormatPlugin, \
    OptionRecommendation
+from calibre.ebooks.metadata.opf2 import OPF, metadata_to_opf
 from calibre.ptempfile import TemporaryDirectory
 from calibre.utils.zipfile import ZipFile

@ -79,10 +81,31 @@ class HTMLZOutput(OutputFormatPlugin):
                        fname = os.path.join(tdir, 'images', images[item.href])
                        with open(fname, 'wb') as img:
                            img.write(data)
+            
+            # Cover
+            cover_path = None
+            try:
+                cover_data = None
+                if oeb_book.metadata.cover:
+                    term = oeb_book.metadata.cover[0].term
+                    cover_data = oeb_book.guide[term].item.data
+                if cover_data:
+                    from calibre.utils.magick.draw import save_cover_data_to
+                    cover_path = os.path.join(tdir, 'cover.jpg')
+                    with open(cover_path, 'w') as cf:
+                        cf.write('')
+                    save_cover_data_to(cover_data, cover_path)
+            except:
+                import traceback
+                traceback.print_exc()

            # Metadata
            with open(os.path.join(tdir, 'metadata.opf'), 'wb') as mdataf:
-                mdataf.write(etree.tostring(oeb_book.metadata.to_opf1()))
+                opf = OPF(StringIO(etree.tostring(oeb_book.metadata.to_opf1())))
+                mi = opf.to_book_metadata()
+                if cover_path:
+                    mi.cover = 'cover.jpg'
+                mdataf.write(metadata_to_opf(mi))

            htmlz = ZipFile(output_path, 'w')
            htmlz.add_dir(tdir)
--- a/src/calibre/ebooks/metadata/init.py
+++ b/src/calibre/ebooks/metadata/init.py
@ -274,6 +274,9 @@ def check_isbn(isbn):
    if not isbn:
        return None
    isbn = re.sub(r'[^0-9X]', '', isbn.upper())
+    all_same = re.match(r'(\d)\1{9,12}$', isbn)
+    if all_same is not None:
+        return None
    if len(isbn) == 10:
        return check_isbn10(isbn)
    if len(isbn) == 13:
--- a/src/calibre/ebooks/metadata/amazon.py
+++ b/src/calibre/ebooks/metadata/amazon.py
@ -1,224 +0,0 @@
-#!/usr/bin/env  python
-__license__   = 'GPL v3'
-__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
-__docformat__ = 'restructuredtext en'
-
-'''
-Fetch metadata using Amazon AWS
-'''
-import sys, re
-from threading import RLock
-
-from lxml import html
-from lxml.html import soupparser
-
-from calibre import browser
-from calibre.ebooks.metadata import check_isbn
-from calibre.ebooks.metadata.book.base import Metadata
-from calibre.ebooks.chardet import xml_to_unicode
-from calibre.library.comments import sanitize_comments_html
-
-asin_cache = {}
-cover_url_cache = {}
-cache_lock = RLock()
-
-def find_asin(br, isbn):
-    q = 'http://www.amazon.com/s/?search-alias=aps&field-keywords='+isbn
-    res = br.open_novisit(q)
-    raw = res.read()
-    raw = xml_to_unicode(raw, strip_encoding_pats=True,
-            resolve_entities=True)[0]
-    root = html.fromstring(raw)
-    revs = root.xpath('//*[@class="asinReviewsSummary" and @name]')
-    revs = [x.get('name') for x in revs]
-    if revs:
-        return revs[0]
-
-def to_asin(br, isbn):
-    with cache_lock:
-        ans = asin_cache.get(isbn, None)
-    if ans:
-        return ans
-    if ans is False:
-        return None
-    if len(isbn) == 13:
-        try:
-            asin = find_asin(br, isbn)
-        except:
-            import traceback
-            traceback.print_exc()
-            asin = None
-    else:
-        asin = isbn
-    with cache_lock:
-        asin_cache[isbn] = asin if asin else False
-    return asin
-
-
-def get_social_metadata(title, authors, publisher, isbn):
-    mi = Metadata(title, authors)
-    if not isbn:
-        return mi
-    isbn = check_isbn(isbn)
-    if not isbn:
-        return mi
-    br = browser()
-    asin = to_asin(br, isbn)
-    if asin and get_metadata(br, asin, mi):
-        return mi
-    from calibre.ebooks.metadata.xisbn import xisbn
-    for i in xisbn.get_associated_isbns(isbn):
-        asin = to_asin(br, i)
-        if asin and get_metadata(br, asin, mi):
-            return mi
-    return mi
-
-def get_cover_url(isbn, br):
-    isbn = check_isbn(isbn)
-    if not isbn:
-        return None
-    with cache_lock:
-        ans = cover_url_cache.get(isbn, None)
-    if ans:
-        return ans
-    if ans is False:
-        return None
-    asin = to_asin(br, isbn)
-    if asin:
-        ans = _get_cover_url(br, asin)
-        if ans:
-            with cache_lock:
-                cover_url_cache[isbn] = ans
-            return ans
-    from calibre.ebooks.metadata.xisbn import xisbn
-    for i in xisbn.get_associated_isbns(isbn):
-        asin = to_asin(br, i)
-        if asin:
-            ans = _get_cover_url(br, asin)
-            if ans:
-                with cache_lock:
-                    cover_url_cache[isbn] = ans
-                    cover_url_cache[i] = ans
-                return ans
-    with cache_lock:
-        cover_url_cache[isbn] = False
-    return None
-
-def _get_cover_url(br, asin):
-    q = 'http://amzn.com/'+asin
-    try:
-        raw = br.open_novisit(q).read()
-    except Exception as e:
-        if callable(getattr(e, 'getcode', None)) and \
-                e.getcode() == 404:
-            return None
-        raise
-    if '<title>404 - ' in raw:
-        return None
-    raw = xml_to_unicode(raw, strip_encoding_pats=True,
-            resolve_entities=True)[0]
-    try:
-        root = soupparser.fromstring(raw)
-    except:
-        return False
-
-    imgs = root.xpath('//img[@id="prodImage" and @src]')
-    if imgs:
-        src = imgs[0].get('src')
-        parts = src.split('/')
-        if len(parts) > 3:
-            bn = parts[-1]
-            sparts = bn.split('_')
-            if len(sparts) > 2:
-                bn = sparts[0] + sparts[-1]
-                return ('/'.join(parts[:-1]))+'/'+bn
-    return None
-
-
-def get_metadata(br, asin, mi):
-    q = 'http://amzn.com/'+asin
-    try:
-        raw = br.open_novisit(q).read()
-    except Exception as e:
-        if callable(getattr(e, 'getcode', None)) and \
-                e.getcode() == 404:
-            return False
-        raise
-    if '<title>404 - ' in raw:
-        return False
-    raw = xml_to_unicode(raw, strip_encoding_pats=True,
-            resolve_entities=True)[0]
-    try:
-        root = soupparser.fromstring(raw)
-    except:
-        return False
-    if root.xpath('//*[@id="errorMessage"]'):
-        return False
-
-    ratings = root.xpath('//div[@class="jumpBar"]/descendant::span[@class="asinReviewsSummary"]')
-    pat = re.compile(r'([0-9.]+) out of (\d+) stars')
-    if ratings:
-        for elem in ratings[0].xpath('descendant::*[@title]'):
-            t = elem.get('title').strip()
-            m = pat.match(t)
-            if m is not None:
-                try:
-                    mi.rating = float(m.group(1))/float(m.group(2)) * 5
-                except:
-                    pass
-
-    desc = root.xpath('//div[@id="productDescription"]/*[@class="content"]')
-    if desc:
-        desc = desc[0]
-        for c in desc.xpath('descendant::*[@class="seeAll" or'
-                ' @class="emptyClear" or @href]'):
-            c.getparent().remove(c)
-        desc = html.tostring(desc, method='html', encoding=unicode).strip()
-        # remove all attributes from tags
-        desc = re.sub(r'<([a-zA-Z0-9]+)\s[^>]+>', r'<\1>', desc)
-        # Collapse whitespace
-        #desc = re.sub('\n+', '\n', desc)
-        #desc = re.sub(' +', ' ', desc)
-        # Remove the notice about text referring to out of print editions
-        desc = re.sub(r'(?s)<em>--This text ref.*?</em>', '', desc)
-        # Remove comments
-        desc = re.sub(r'(?s)<!--.*?-->', '', desc)
-        mi.comments = sanitize_comments_html(desc)
-
-    return True
-
-
-def main(args=sys.argv):
-    import tempfile, os
-    tdir = tempfile.gettempdir()
-    br = browser()
-    for title, isbn in [
-            ('The Heroes', '9780316044981'), # Test find_asin
-            ('Learning Python', '8324616489'), # Test xisbn
-            ('Angels & Demons', '9781416580829'), # Test sophisticated comment formatting
-            # Random tests
-            ('Star Trek: Destiny: Mere Mortals', '9781416551720'),
-            ('The Great Gatsby', '0743273567'),
-            ]:
-        cpath = os.path.join(tdir, title+'.jpg')
-        curl = get_cover_url(isbn, br)
-        if curl is None:
-            print 'No cover found for', title
-        else:
-            open(cpath, 'wb').write(br.open_novisit(curl).read())
-            print 'Cover for', title, 'saved to', cpath
-
-        #import time
-        #st = time.time()
-        mi = get_social_metadata(title, None, None, isbn)
-        if not mi.comments:
-            print 'Failed to downlaod social metadata for', title
-            return 1
-        #print '\n\n', time.time() - st, '\n\n'
-        print mi
-        print '\n'
-
-    return 0
-
-if __name__ == '__main__':
-    sys.exit(main())
--- a/src/calibre/ebooks/metadata/amazonfr.py
+++ b/src/calibre/ebooks/metadata/amazonfr.py
@ -1,516 +0,0 @@
-from __future__ import with_statement
-__license__ = 'GPL 3'
-__copyright__ = '2010, sengian <sengian1@gmail.com>'
-
-import sys, textwrap, re, traceback
-from urllib import urlencode
-from math import ceil
-
-from lxml import html
-from lxml.html import soupparser
-
-from calibre.utils.date import parse_date, utcnow, replace_months
-from calibre.utils.cleantext import clean_ascii_chars
-from calibre import browser, preferred_encoding
-from calibre.ebooks.chardet import xml_to_unicode
-from calibre.ebooks.metadata import MetaInformation, check_isbn, \
-    authors_to_sort_string
-from calibre.ebooks.metadata.fetch import MetadataSource
-from calibre.utils.config import OptionParser
-from calibre.library.comments import sanitize_comments_html
-
-
-class AmazonFr(MetadataSource):
-
-    name = 'Amazon French'
-    description = _('Downloads metadata from amazon.fr')
-    supported_platforms = ['windows', 'osx', 'linux']
-    author = 'Sengian'
-    version = (1, 0, 0)
-    has_html_comments = True
-
-    def fetch(self):
-        try:
-            self.results = search(self.title, self.book_author, self.publisher,
-                                  self.isbn, max_results=10, verbose=self.verbose, lang='fr')
-        except Exception as e:
-            self.exception = e
-            self.tb = traceback.format_exc()
-
-class AmazonEs(MetadataSource):
-
-    name = 'Amazon Spanish'
-    description = _('Downloads metadata from amazon.com in spanish')
-    supported_platforms = ['windows', 'osx', 'linux']
-    author = 'Sengian'
-    version = (1, 0, 0)
-    has_html_comments = True
-
-    def fetch(self):
-        try:
-            self.results = search(self.title, self.book_author, self.publisher,
-                                  self.isbn, max_results=10, verbose=self.verbose, lang='es')
-        except Exception as e:
-            self.exception = e
-            self.tb = traceback.format_exc()
-
-class AmazonEn(MetadataSource):
-
-    name = 'Amazon English'
-    description = _('Downloads metadata from amazon.com in english')
-    supported_platforms = ['windows', 'osx', 'linux']
-    author = 'Sengian'
-    version = (1, 0, 0)
-    has_html_comments = True
-
-    def fetch(self):
-        try:
-            self.results = search(self.title, self.book_author, self.publisher,
-                                  self.isbn, max_results=10, verbose=self.verbose, lang='en')
-        except Exception as e:
-            self.exception = e
-            self.tb = traceback.format_exc()
-
-class AmazonDe(MetadataSource):
-
-    name = 'Amazon German'
-    description = _('Downloads metadata from amazon.de')
-    supported_platforms = ['windows', 'osx', 'linux']
-    author = 'Sengian'
-    version = (1, 0, 0)
-    has_html_comments = True
-
-    def fetch(self):
-        try:
-            self.results = search(self.title, self.book_author, self.publisher,
-                                  self.isbn, max_results=10, verbose=self.verbose, lang='de')
-        except Exception as e:
-            self.exception = e
-            self.tb = traceback.format_exc()
-
-class Amazon(MetadataSource):
-
-    name = 'Amazon'
-    description = _('Downloads metadata from amazon.com')
-    supported_platforms = ['windows', 'osx', 'linux']
-    author = 'Kovid Goyal & Sengian'
-    version = (1, 1, 0)
-    has_html_comments = True
-
-    def fetch(self):
-        # if not self.site_customization:
-            # return
-        try:
-            self.results = search(self.title, self.book_author, self.publisher,
-                                  self.isbn, max_results=10, verbose=self.verbose, lang='all')
-        except Exception as e:
-            self.exception = e
-            self.tb = traceback.format_exc()
-
-    # @property
-    # def string_customization_help(self):
-        # return _('You can select here the language for metadata search with amazon.com')
-
-
-def report(verbose):
-    if verbose:
-        traceback.print_exc()
-
-
-class Query(object):
-
-    BASE_URL_ALL = 'http://www.amazon.com'
-    BASE_URL_FR = 'http://www.amazon.fr'
-    BASE_URL_DE = 'http://www.amazon.de'
-
-    def __init__(self, title=None, author=None, publisher=None, isbn=None, keywords=None,
-        max_results=20, rlang='all'):
-        assert not(title is None and author is None and publisher is None \
-            and isbn is None and keywords is None)
-        assert (max_results < 21)
-
-        self.max_results = int(max_results)
-        self.renbres = re.compile(u'\s*(\d+)\s*')
-
-        q = {   'search-alias' : 'stripbooks' ,
-                'unfiltered' : '1',
-                'field-keywords' : '',
-                'field-author' : '',
-                'field-title' : '',
-                'field-isbn' : '',
-                'field-publisher' : ''
-                #get to amazon detailed search page to get all options
-                # 'node' : '',
-                # 'field-binding' : '',
-                #before, during, after
-                # 'field-dateop' : '',
-                #month as number
-                # 'field-datemod' : '',
-                # 'field-dateyear' : '',
-                #french only
-                # 'field-collection' : '',
-                #many options available
-            }
-
-        if rlang =='all':
-            q['sort'] = 'relevanceexprank'
-            self.urldata = self.BASE_URL_ALL
-        elif rlang =='es':
-            q['sort'] = 'relevanceexprank'
-            q['field-language'] = 'Spanish'
-            self.urldata = self.BASE_URL_ALL
-        elif rlang =='en':
-            q['sort'] = 'relevanceexprank'
-            q['field-language'] = 'English'
-            self.urldata = self.BASE_URL_ALL
-        elif rlang =='fr':
-            q['sort'] = 'relevancerank'
-            self.urldata = self.BASE_URL_FR
-        elif rlang =='de':
-            q['sort'] = 'relevancerank'
-            self.urldata = self.BASE_URL_DE
-        self.baseurl = self.urldata
-
-        if isbn is not None:
-            q['field-isbn'] = isbn.replace('-', '')
-        else:
-            if title is not None:
-                q['field-title'] = title
-            if author is not None:
-                q['field-author'] = author
-            if publisher is not None:
-                q['field-publisher'] = publisher
-            if keywords is not None:
-                q['field-keywords'] = keywords
-
-        if isinstance(q, unicode):
-            q = q.encode('utf-8')
-        self.urldata += '/gp/search/ref=sr_adv_b/?' + urlencode(q)
-
-    def __call__(self, browser, verbose, timeout = 5.):
-        if verbose:
-            print 'Query:', self.urldata
-
-        try:
-            raw = browser.open_novisit(self.urldata, timeout=timeout).read()
-        except Exception as e:
-            report(verbose)
-            if callable(getattr(e, 'getcode', None)) and \
-                    e.getcode() == 404:
-                return
-            raise
-        if '<title>404 - ' in raw:
-            return
-        raw = xml_to_unicode(raw, strip_encoding_pats=True,
-                resolve_entities=True)[0]
-
-        try:
-            feed = soupparser.fromstring(raw)
-        except:
-            try:
-                #remove ASCII invalid chars
-                return soupparser.fromstring(clean_ascii_chars(raw))
-            except:
-                return None, self.urldata
-
-        #nb of page
-        try:
-            nbresults = self.renbres.findall(feed.xpath("//*[@class='resultCount']")[0].text)
-        except:
-            return None, self.urldata
-
-        pages =[feed]
-        if len(nbresults) > 1:
-            nbpagetoquery = int(ceil(float(min(int(nbresults[2]), self.max_results))/ int(nbresults[1])))
-            for i in xrange(2, nbpagetoquery + 1):
-                try:
-                    urldata = self.urldata + '&page=' + str(i)
-                    raw = browser.open_novisit(urldata, timeout=timeout).read()
-                except Exception as e:
-                    continue
-                if '<title>404 - ' in raw:
-                    continue
-                raw = xml_to_unicode(raw, strip_encoding_pats=True,
-                        resolve_entities=True)[0]
-                try:
-                    feed = soupparser.fromstring(raw)
-                except:
-                    try:
-                        #remove ASCII invalid chars
-                        return soupparser.fromstring(clean_ascii_chars(raw))
-                    except:
-                        continue
-                pages.append(feed)
-
-        results = []
-        for x in pages:
-            results.extend([i.getparent().get('href') \
-                for i in x.xpath("//a/span[@class='srTitle']")])
-        return results[:self.max_results], self.baseurl
-
-class ResultList(list):
-
-    def __init__(self, baseurl, lang = 'all'):
-        self.baseurl = baseurl
-        self.lang = lang
-        self.repub = re.compile(u'\((.*)\)')
-        self.rerat = re.compile(u'([0-9.]+)')
-        self.reattr = re.compile(r'<([a-zA-Z0-9]+)\s[^>]+>')
-        self.reoutp = re.compile(r'(?s)<em>--This text ref.*?</em>')
-        self.recom = re.compile(r'(?s)<!--.*?-->')
-        self.republi = re.compile(u'(Editeur|Publisher|Verlag)', re.I)
-        self.reisbn = re.compile(u'(ISBN-10|ISBN-10|ASIN)', re.I)
-        self.relang = re.compile(u'(Language|Langue|Sprache)', re.I)
-        self.reratelt = re.compile(u'(Average\s*Customer\s*Review|Moyenne\s*des\s*commentaires\s*client|Durchschnittliche\s*Kundenbewertung)', re.I)
-        self.reprod = re.compile(u'(Product\s*Details|D.tails\s*sur\s*le\s*produit|Produktinformation)', re.I)
-
-    def strip_tags_etree(self, etreeobj, invalid_tags):
-        for (itag, rmv) in invalid_tags.iteritems():
-            if rmv:
-                for elts in etreeobj.getiterator(itag):
-                    elts.drop_tree()
-            else:
-                for elts in etreeobj.getiterator(itag):
-                    elts.drop_tag()
-
-    def clean_entry(self, entry, invalid_tags = {'script': True},
-                invalid_id = (), invalid_class=()):
-        #invalid_tags: remove tag and keep content if False else remove
-        #remove tags
-        if invalid_tags:
-            self.strip_tags_etree(entry, invalid_tags)
-        #remove id
-        if invalid_id:
-            for eltid in invalid_id:
-                elt = entry.get_element_by_id(eltid)
-                if elt is not None:
-                    elt.drop_tree()
-        #remove class
-        if invalid_class:
-            for eltclass in invalid_class:
-                elts = entry.find_class(eltclass)
-                if elts is not None:
-                    for elt in elts:
-                        elt.drop_tree()
-
-    def get_title(self, entry):
-        title = entry.get_element_by_id('btAsinTitle')
-        if title is not None:
-            title = title.text
-        return unicode(title.replace('\n', '').strip())
-
-    def get_authors(self, entry):
-        author = entry.get_element_by_id('btAsinTitle')
-        while author.getparent().tag != 'div':
-            author = author.getparent()
-        author = author.getparent()
-        authortext = []
-        for x in author.getiterator('a'):
-            authortext.append(unicode(x.text_content().strip()))
-        return authortext
-
-    def get_description(self, entry, verbose):
-        try:
-            description = entry.get_element_by_id("productDescription").find("div[@class='content']")
-            inv_class = ('seeAll', 'emptyClear')
-            inv_tags ={'img': True, 'a': False}
-            self.clean_entry(description, invalid_tags=inv_tags, invalid_class=inv_class)
-            description = html.tostring(description, method='html', encoding=unicode).strip()
-            # remove all attributes from tags
-            description = self.reattr.sub(r'<\1>', description)
-            # Remove the notice about text referring to out of print editions
-            description = self.reoutp.sub('', description)
-            # Remove comments
-            description = self.recom.sub('', description)
-            return unicode(sanitize_comments_html(description))
-        except:
-            report(verbose)
-            return None
-
-    def get_tags(self, entry, browser, verbose):
-        try:
-            tags = entry.get_element_by_id('tagContentHolder')
-            testptag = tags.find_class('see-all')
-            if testptag:
-                for x in testptag:
-                    alink = x.xpath('descendant-or-self::a')
-                    if alink:
-                        if alink[0].get('class') == 'tgJsActive':
-                            continue
-                        link = self.baseurl + alink[0].get('href')
-                        entry = self.get_individual_metadata(browser, link, verbose)
-                        tags = entry.get_element_by_id('tagContentHolder')
-                        break
-            tags = [a.text for a in tags.getiterator('a') if a.get('rel') == 'tag']
-        except:
-            report(verbose)
-            tags = []
-        return tags
-
-    def get_book_info(self, entry, mi, verbose):
-        try:
-            entry = entry.get_element_by_id('SalesRank').getparent()
-        except:
-            try:
-                for z in entry.getiterator('h2'):
-                    if self.reprod.search(z.text_content()):
-                        entry = z.getparent().find("div[@class='content']/ul")
-                        break
-            except:
-                report(verbose)
-                return mi
-        elts = entry.findall('li')
-        #pub & date
-        elt = filter(lambda x: self.republi.search(x.find('b').text), elts)
-        if elt:
-            pub = elt[0].find('b').tail
-            mi.publisher = unicode(self.repub.sub('', pub).strip())
-            d = self.repub.search(pub)
-            if d is not None:
-                d = d.group(1)
-                try:
-                    default = utcnow().replace(day=15)
-                    if self.lang != 'all':
-                        d = replace_months(d, self.lang)
-                    d = parse_date(d, assume_utc=True, default=default)
-                    mi.pubdate = d
-                except:
-                    report(verbose)
-        #ISBN
-        elt = filter(lambda x: self.reisbn.search(x.find('b').text), elts)
-        if elt:
-            isbn = elt[0].find('b').tail.replace('-', '').strip()
-            if check_isbn(isbn):
-                    mi.isbn = unicode(isbn)
-            elif len(elt) > 1:
-                isbn = elt[1].find('b').tail.replace('-', '').strip()
-                if check_isbn(isbn):
-                    mi.isbn = unicode(isbn)
-        #Langue
-        elt = filter(lambda x: self.relang.search(x.find('b').text), elts)
-        if elt:
-            langue = elt[0].find('b').tail.strip()
-            if langue:
-                mi.language = unicode(langue)
-        #ratings
-        elt = filter(lambda x: self.reratelt.search(x.find('b').text), elts)
-        if elt:
-            ratings = elt[0].find_class('swSprite')
-            if ratings:
-                ratings = self.rerat.findall(ratings[0].get('title'))
-                if len(ratings) == 2:
-                    mi.rating = float(ratings[0])/float(ratings[1]) * 5
-        return mi
-
-    def fill_MI(self, entry, title, authors, browser, verbose):
-        mi = MetaInformation(title, authors)
-        mi.author_sort = authors_to_sort_string(authors)
-        mi.comments = self.get_description(entry, verbose)
-        mi = self.get_book_info(entry, mi, verbose)
-        mi.tags = self.get_tags(entry, browser, verbose)
-        return mi
-
-    def get_individual_metadata(self, browser, linkdata, verbose):
-        try:
-            raw = browser.open_novisit(linkdata).read()
-        except Exception as e:
-            report(verbose)
-            if callable(getattr(e, 'getcode', None)) and \
-                    e.getcode() == 404:
-                return
-            raise
-        if '<title>404 - ' in raw:
-            report(verbose)
-            return
-        raw = xml_to_unicode(raw, strip_encoding_pats=True,
-                resolve_entities=True)[0]
-        try:
-            return soupparser.fromstring(raw)
-        except:
-            try:
-                #remove ASCII invalid chars
-                return soupparser.fromstring(clean_ascii_chars(raw))
-            except:
-                report(verbose)
-                return
-
-    def populate(self, entries, browser, verbose=False):
-        for x in entries:
-            try:
-                entry = self.get_individual_metadata(browser, x, verbose)
-                # clean results
-                # inv_ids = ('divsinglecolumnminwidth', 'sims.purchase', 'AutoBuyXGetY', 'A9AdsMiddleBoxTop')
-                # inv_class = ('buyingDetailsGrid', 'productImageGrid')
-                # inv_tags ={'script': True, 'style': True, 'form': False}
-                # self.clean_entry(entry, invalid_id=inv_ids)
-                title = self.get_title(entry)
-                authors = self.get_authors(entry)
-            except Exception as e:
-                if verbose:
-                    print 'Failed to get all details for an entry'
-                    print e
-                    print 'URL who failed:', x
-                    report(verbose)
-                continue
-            self.append(self.fill_MI(entry, title, authors, browser, verbose))
-
-
-def search(title=None, author=None, publisher=None, isbn=None,
-           max_results=5, verbose=False, keywords=None, lang='all'):
-    br = browser()
-    entries, baseurl = Query(title=title, author=author, isbn=isbn, publisher=publisher,
-        keywords=keywords, max_results=max_results,rlang=lang)(br, verbose)
-
-    if entries is None or len(entries) == 0:
-        return
-
-    #List of entry
-    ans = ResultList(baseurl, lang)
-    ans.populate(entries, br, verbose)
-    return ans
-
-def option_parser():
-    parser = OptionParser(textwrap.dedent(\
-    _('''\
-        %prog [options]
-
-        Fetch book metadata from Amazon. You must specify one of title, author,
-        ISBN, publisher or keywords. Will fetch a maximum of 10 matches,
-        so you should make your query as specific as possible.
-        You can chose the language for metadata retrieval:
-        All & english & french & german & spanish
-    '''
-    )))
-    parser.add_option('-t', '--title', help='Book title')
-    parser.add_option('-a', '--author', help='Book author(s)')
-    parser.add_option('-p', '--publisher', help='Book publisher')
-    parser.add_option('-i', '--isbn', help='Book ISBN')
-    parser.add_option('-k', '--keywords', help='Keywords')
-    parser.add_option('-m', '--max-results', default=10,
-                      help='Maximum number of results to fetch')
-    parser.add_option('-l', '--lang', default='all',
-                      help='Chosen language for metadata search (all, en, fr, es, de)')
-    parser.add_option('-v', '--verbose', default=0, action='count',
-                      help='Be more verbose about errors')
-    return parser
-
-def main(args=sys.argv):
-    parser = option_parser()
-    opts, args = parser.parse_args(args)
-    try:
-        results = search(opts.title, opts.author, isbn=opts.isbn, publisher=opts.publisher,
-            keywords=opts.keywords, verbose=opts.verbose, max_results=opts.max_results,
-                lang=opts.lang)
-    except AssertionError:
-        report(True)
-        parser.print_help()
-        return 1
-    if results is None or len(results) == 0:
-        print 'No result found for this search!'
-        return 0
-    for result in results:
-        print unicode(result).encode(preferred_encoding, 'replace')
-        print
-
-if __name__ == '__main__':
-    sys.exit(main())
--- a/src/calibre/ebooks/metadata/book/base.py
+++ b/src/calibre/ebooks/metadata/book/base.py
@ -68,7 +68,19 @@ composite_formatter = SafeFormat()
 class Metadata(object):

    '''
-    A class representing all the metadata for a book.
+    A class representing all the metadata for a book. The various standard metadata
+    fields are available as attributes of this object. You can also stick
+    arbitrary attributes onto this object.
+
+    Metadata from custom columns should be accessed via the get() method,
+    passing in the lookup name for the column, for example: "#mytags".
+
+    Use the :meth:`is_null` method to test if a filed is null.
+
+    This object also has functions to format fields into strings.
+
+    The list of standard metadata fields grows with time is in
+    :data:`STANDARD_METADATA_FIELDS`.

    Please keep the method based API of this class to a minimum. Every method
    becomes a reserved field name.
@ -88,11 +100,19 @@ class Metadata(object):
            if title:
                self.title = title
            if authors:
-                #: List of strings or []
+                # List of strings or []
                self.author = list(authors) if authors else []# Needed for backward compatibility
                self.authors = list(authors) if authors else []

    def is_null(self, field):
+        '''
+        Return True if the value of filed is null in this object.
+        'null' means it is unknown or evaluates to False. So a title of
+        _('Unknown') is null or a language of 'und' is null.
+
+        Be careful with numeric fields since this will return True for zero as
+        well as None.
+        '''
        null_val = NULL_VALUES.get(field, None)
        val = getattr(self, field, None)
        return not val or val == null_val
@ -120,7 +140,11 @@ class Metadata(object):
                                            _('TEMPLATE ERROR'),
                                            self).strip()
            return val
-
+        if field.startswith('#') and field.endswith('_index'):
+            try:
+                return self.get_extra(field[:-6])
+            except:
+                pass
        raise AttributeError(
                'Metadata object has no attribute named: '+ repr(field))

@ -170,11 +194,6 @@ class Metadata(object):
        try:
            return self.__getattribute__(field)
        except AttributeError:
-            if field.startswith('#') and field.endswith('_index'):
-                try:
-                    return self.get_extra(field[:-6])
-                except:
-                    pass
            return default

    def get_extra(self, field, default=None):
@ -544,17 +563,24 @@ class Metadata(object):
    def format_tags(self):
        return u', '.join([unicode(t) for t in sorted(self.tags, key=sort_key)])

-    def format_rating(self):
-        return unicode(self.rating)
+    def format_rating(self, v=None, divide_by=1.0):
+        if v is None:
+            if self.rating is not None:
+                return unicode(self.rating/divide_by)
+            return u'None'
+        return unicode(v/divide_by)

    def format_field(self, key, series_with_index=True):
+        '''
+        Returns the tuple (display_name, formatted_value)
+        '''
        name, val, ign, ign = self.format_field_extended(key, series_with_index)
        return (name, val)

    def format_field_extended(self, key, series_with_index=True):
        from calibre.ebooks.metadata import authors_to_string
        '''
-        returns the tuple (field_name, formatted_value, original_value,
+        returns the tuple (display_name, formatted_value, original_value,
        field_metadata)
        '''

@ -631,13 +657,17 @@ class Metadata(object):
                res = format_date(res, fmeta['display'].get('date_format','dd MMM yyyy'))
            elif datatype == 'rating':
                res = res/2.0
-            elif key in ('book_size', 'size'):
+            elif key == 'size':
                res = human_readable(res)
            return (name, unicode(res), orig_res, fmeta)

        return (None, None, None, None)

    def __unicode__(self):
+        '''
+        A string representation of this object, suitable for printing to
+        console
+        '''
        from calibre.ebooks.metadata import authors_to_string
        ans = []
        def fmt(x, y):
@ -681,6 +711,9 @@ class Metadata(object):
        return u'\n'.join(ans)

    def to_html(self):
+        '''
+        A HTML representation of this object.
+        '''
        from calibre.ebooks.metadata import authors_to_string
        ans = [(_('Title'), unicode(self.title))]
        ans += [(_('Author(s)'), (authors_to_string(self.authors) if self.authors else _('Unknown')))]
--- a/src/calibre/ebooks/metadata/covers.py
+++ b/src/calibre/ebooks/metadata/covers.py
@ -1,317 +0,0 @@
-#!/usr/bin/env python
-# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
-
-__license__   = 'GPL v3'
-__copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
-__docformat__ = 'restructuredtext en'
-
-import traceback, socket, sys
-from functools import partial
-from threading import Thread, Event
-from Queue import Queue, Empty
-from lxml import etree
-
-import mechanize
-
-from calibre.customize import Plugin
-from calibre import browser, prints
-from calibre.constants import preferred_encoding, DEBUG
-
-class CoverDownload(Plugin):
-    '''
-    These plugins are used to download covers for books.
-    '''
-
-    supported_platforms = ['windows', 'osx', 'linux']
-    author = 'Kovid Goyal'
-    type = _('Cover download')
-
-    def has_cover(self, mi, ans, timeout=5.):
-        '''
-        Check if the book described by mi has a cover. Call ans.set() if it
-        does. Do nothing if it doesn't.
-
-        :param mi: MetaInformation object
-        :param timeout: timeout in seconds
-        :param ans: A threading.Event object
-        '''
-        raise NotImplementedError()
-
-    def get_covers(self, mi, result_queue, abort, timeout=5.):
-        '''
-        Download covers for books described by the mi object. Downloaded covers
-        must be put into the result_queue. If more than one cover is available,
-        the plugin should continue downloading them and putting them into
-        result_queue until abort.is_set() returns True.
-
-        :param mi: MetaInformation object
-        :param result_queue: A multithreaded Queue
-        :param abort: A threading.Event object
-        :param timeout: timeout in seconds
-        '''
-        raise NotImplementedError()
-
-    def exception_to_string(self, ex):
-        try:
-            return unicode(ex)
-        except:
-            try:
-                return str(ex).decode(preferred_encoding, 'replace')
-            except:
-                return repr(ex)
-
-    def debug(self, *args, **kwargs):
-        if DEBUG:
-            prints('\t'+self.name+':', *args, **kwargs)
-
-
-
-class HeadRequest(mechanize.Request):
-
-    def get_method(self):
-        return 'HEAD'
-
-class OpenLibraryCovers(CoverDownload): # {{{
-    'Download covers from openlibrary.org'
-
-    # See http://openlibrary.org/dev/docs/api/covers
-
-    OPENLIBRARY = 'http://covers.openlibrary.org/b/isbn/%s-L.jpg?default=false'
-    name = 'openlibrary.org covers'
-    description = _('Download covers from openlibrary.org')
-    author = 'Kovid Goyal'
-
-    def has_cover(self, mi, ans, timeout=5.):
-        if not mi.isbn:
-            return False
-        from calibre.ebooks.metadata.library_thing import get_browser
-        br = get_browser()
-        br.set_handle_redirect(False)
-        try:
-            br.open_novisit(HeadRequest(self.OPENLIBRARY%mi.isbn), timeout=timeout)
-            self.debug('cover for', mi.isbn, 'found')
-            ans.set()
-        except Exception as e:
-            if callable(getattr(e, 'getcode', None)) and e.getcode() == 302:
-                self.debug('cover for', mi.isbn, 'found')
-                ans.set()
-            else:
-                self.debug(e)
-
-    def get_covers(self, mi, result_queue, abort, timeout=5.):
-        if not mi.isbn:
-            return
-        from calibre.ebooks.metadata.library_thing import get_browser
-        br = get_browser()
-        try:
-            ans = br.open(self.OPENLIBRARY%mi.isbn, timeout=timeout).read()
-            result_queue.put((True, ans, 'jpg', self.name))
-        except Exception as e:
-            if callable(getattr(e, 'getcode', None)) and e.getcode() == 404:
-                result_queue.put((False, _('ISBN: %s not found')%mi.isbn, '', self.name))
-            else:
-                result_queue.put((False, self.exception_to_string(e),
-                    traceback.format_exc(), self.name))
-
-# }}}
-
-class AmazonCovers(CoverDownload): # {{{
-
-    name = 'amazon.com covers'
-    description = _('Download covers from amazon.com')
-    author = 'Kovid Goyal'
-
-
-    def has_cover(self, mi, ans, timeout=5.):
-        if not mi.isbn:
-            return False
-        from calibre.ebooks.metadata.amazon import get_cover_url
-        br = browser()
-        try:
-            get_cover_url(mi.isbn, br)
-            self.debug('cover for', mi.isbn, 'found')
-            ans.set()
-        except Exception as e:
-            self.debug(e)
-
-    def get_covers(self, mi, result_queue, abort, timeout=5.):
-        if not mi.isbn:
-            return
-        from calibre.ebooks.metadata.amazon import get_cover_url
-        br = browser()
-        try:
-            url = get_cover_url(mi.isbn, br)
-            if url is None:
-                raise ValueError('No cover found for ISBN: %s'%mi.isbn)
-            cover_data = br.open_novisit(url).read()
-            result_queue.put((True, cover_data, 'jpg', self.name))
-        except Exception as e:
-            result_queue.put((False, self.exception_to_string(e),
-                traceback.format_exc(), self.name))
-
-# }}}
-
-def check_for_cover(mi, timeout=5.): # {{{
-    from calibre.customize.ui import cover_sources
-    ans = Event()
-    checkers = [partial(p.has_cover, mi, ans, timeout=timeout) for p in
-            cover_sources()]
-    workers = [Thread(target=c) for c in checkers]
-    for w in workers:
-        w.daemon = True
-        w.start()
-    while not ans.is_set():
-        ans.wait(0.1)
-        if sum([int(w.is_alive()) for w in workers]) == 0:
-            break
-    return ans.is_set()
-
-# }}}
-
-def download_covers(mi, result_queue, max_covers=50, timeout=5.): # {{{
-    from calibre.customize.ui import cover_sources
-    abort = Event()
-    temp = Queue()
-    getters = [partial(p.get_covers, mi, temp, abort, timeout=timeout) for p in
-            cover_sources()]
-    workers = [Thread(target=c) for c in getters]
-    for w in workers:
-        w.daemon = True
-        w.start()
-    count = 0
-    while count < max_covers:
-        try:
-            result = temp.get_nowait()
-            if result[0]:
-                count += 1
-            result_queue.put(result)
-        except Empty:
-            pass
-        if sum([int(w.is_alive()) for w in workers]) == 0:
-            break
-
-    abort.set()
-
-    while True:
-        try:
-            result = temp.get_nowait()
-            count += 1
-            result_queue.put(result)
-        except Empty:
-            break
-
-# }}}
-
-class DoubanCovers(CoverDownload): # {{{
-    'Download covers from Douban.com'
-
-    DOUBAN_ISBN_URL = 'http://api.douban.com/book/subject/isbn/'
-    CALIBRE_DOUBAN_API_KEY = '0bd1672394eb1ebf2374356abec15c3d'
-    name = 'Douban.com covers'
-    description = _('Download covers from Douban.com')
-    author = 'Li Fanxi'
-
-    def get_cover_url(self, isbn, br, timeout=5.):
-        try:
-            url = self.DOUBAN_ISBN_URL + isbn + "?apikey=" + self.CALIBRE_DOUBAN_API_KEY
-            src = br.open(url, timeout=timeout).read()
-        except Exception as err:
-            if isinstance(getattr(err, 'args', [None])[0], socket.timeout):
-                err = Exception(_('Douban.com API timed out. Try again later.'))
-            raise err
-        else:
-            feed = etree.fromstring(src)
-            NAMESPACES = {
-              'openSearch':'http://a9.com/-/spec/opensearchrss/1.0/',
-              'atom' : 'http://www.w3.org/2005/Atom',
-              'db': 'http://www.douban.com/xmlns/'
-            }
-            XPath = partial(etree.XPath, namespaces=NAMESPACES)
-            entries = XPath('//atom:entry')(feed)
-            if len(entries) < 1:
-                return None
-            try:
-                cover_url = XPath("descendant::atom:link[@rel='image']/attribute::href")
-                u = cover_url(entries[0])[0].replace('/spic/', '/lpic/');
-                # If URL contains "book-default", the book doesn't have a cover
-                if u.find('book-default') != -1:
-                    return None
-            except:
-                return None
-            return u
-
-    def has_cover(self, mi, ans, timeout=5.):
-        if not mi.isbn:
-            return False
-        br = browser()
-        try:
-            if self.get_cover_url(mi.isbn, br, timeout=timeout) != None:
-                self.debug('cover for', mi.isbn, 'found')
-                ans.set()
-        except Exception as e:
-            self.debug(e)
-
-    def get_covers(self, mi, result_queue, abort, timeout=5.):
-        if not mi.isbn:
-            return
-        br = browser()
-        try:
-            url = self.get_cover_url(mi.isbn, br, timeout=timeout)
-            cover_data = br.open_novisit(url).read()
-            result_queue.put((True, cover_data, 'jpg', self.name))
-        except Exception as e:
-            result_queue.put((False, self.exception_to_string(e),
-                traceback.format_exc(), self.name))
-# }}}
-
-def download_cover(mi, timeout=5.): # {{{
-    results = Queue()
-    download_covers(mi, results, max_covers=1, timeout=timeout)
-    errors, ans = [], None
-    while True:
-        try:
-            x = results.get_nowait()
-            if x[0]:
-                ans = x[1]
-            else:
-                errors.append(x)
-        except Empty:
-            break
-    return ans, errors
-
-# }}}
-
-def test(isbns): # {{{
-    from calibre.ebooks.metadata import MetaInformation
-    mi = MetaInformation('test', ['test'])
-    for isbn in isbns:
-        prints('Testing ISBN:', isbn)
-        mi.isbn = isbn
-        found = check_for_cover(mi)
-        prints('Has cover:', found)
-        ans, errors = download_cover(mi)
-        if ans is not None:
-            prints('Cover downloaded')
-        else:
-            prints('Download failed:')
-            for err in errors:
-                prints('\t', err[-1]+':', err[1])
-        print '\n'
-
-# }}}
-
-if __name__ == '__main__':
-    isbns = sys.argv[1:] + ['9781591025412', '9780307272119']
-    #test(isbns)
-
-    from calibre.ebooks.metadata import MetaInformation
-    oc = OpenLibraryCovers(None)
-    for isbn in isbns:
-        mi = MetaInformation('xx', ['yy'])
-        mi.isbn = isbn
-        rq = Queue()
-        oc.get_covers(mi, rq, Event())
-        result = rq.get_nowait()
-        if not result[0]:
-            print 'Failed for ISBN:', isbn
-            print result
--- a/src/calibre/ebooks/metadata/douban.py
+++ b/src/calibre/ebooks/metadata/douban.py
@ -1,263 +0,0 @@
-from __future__ import with_statement
-__license__ = 'GPL 3'
-__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>; 2010, Li Fanxi <lifanxi@freemindworld.com>'
-__docformat__ = 'restructuredtext en'
-
-import sys, textwrap
-import traceback
-from urllib import urlencode
-from functools import partial
-from lxml import etree
-
-from calibre import browser, preferred_encoding
-from calibre.ebooks.metadata import MetaInformation
-from calibre.utils.config import OptionParser
-from calibre.ebooks.metadata.fetch import MetadataSource
-from calibre.utils.date import parse_date, utcnow
-
-NAMESPACES = {
-              'openSearch':'http://a9.com/-/spec/opensearchrss/1.0/',
-              'atom' : 'http://www.w3.org/2005/Atom',
-              'db': 'http://www.douban.com/xmlns/'
-            }
-XPath = partial(etree.XPath, namespaces=NAMESPACES)
-total_results  = XPath('//openSearch:totalResults')
-start_index    = XPath('//openSearch:startIndex')
-items_per_page = XPath('//openSearch:itemsPerPage')
-entry          = XPath('//atom:entry')
-entry_id       = XPath('descendant::atom:id')
-title          = XPath('descendant::atom:title')
-description    = XPath('descendant::atom:summary')
-publisher      = XPath("descendant::db:attribute[@name='publisher']")
-isbn           = XPath("descendant::db:attribute[@name='isbn13']")
-date           = XPath("descendant::db:attribute[@name='pubdate']")
-creator        = XPath("descendant::db:attribute[@name='author']")
-tag            = XPath("descendant::db:tag")
-
-CALIBRE_DOUBAN_API_KEY = '0bd1672394eb1ebf2374356abec15c3d'
-
-class DoubanBooks(MetadataSource):
-
-    name = 'Douban Books'
-    description = _('Downloads metadata from Douban.com')
-    supported_platforms = ['windows', 'osx', 'linux'] # Platforms this plugin will run on
-    author              = 'Li Fanxi <lifanxi@freemindworld.com>' # The author of this plugin
-    version             = (1, 0, 1)   # The version number of this plugin
-
-    def fetch(self):
-        try:
-            self.results = search(self.title, self.book_author, self.publisher,
-                                  self.isbn, max_results=10,
-                                  verbose=self.verbose)
-        except Exception as e:
-            self.exception = e
-            self.tb = traceback.format_exc()
-
-def report(verbose):
-    if verbose:
-        import traceback
-        traceback.print_exc()
-
-class Query(object):
-
-    SEARCH_URL = 'http://api.douban.com/book/subjects?'
-    ISBN_URL = 'http://api.douban.com/book/subject/isbn/'
-
-    type = "search"
-
-    def __init__(self, title=None, author=None, publisher=None, isbn=None,
-                 max_results=20, start_index=1, api_key=''):
-        assert not(title is None and author is None and publisher is None and \
-                   isbn is None)
-        assert (int(max_results) < 21)
-        q = ''
-        if isbn is not None:
-            q = isbn
-            self.type = 'isbn'
-        else:
-            def build_term(parts):
-                return ' '.join(x for x in parts)
-            if title is not None:
-                q += build_term(title.split())
-            if author is not None:
-                q += (' ' if q else '') + build_term(author.split())
-            if publisher is not None:
-                q += (' ' if q else '') + build_term(publisher.split())
-            self.type = 'search'
-
-        if isinstance(q, unicode):
-            q = q.encode('utf-8')
-
-        if self.type == "isbn":
-            self.url = self.ISBN_URL + q
-            if api_key != '':
-                self.url = self.url + "?apikey=" + api_key
-        else:
-            self.url = self.SEARCH_URL+urlencode({
-                    'q':q,
-                    'max-results':max_results,
-                    'start-index':start_index,
-                    })
-            if api_key != '':
-                self.url = self.url + "&apikey=" + api_key
-
-    def __call__(self, browser, verbose):
-        if verbose:
-            print 'Query:', self.url
-        if self.type == "search":
-            feed = etree.fromstring(browser.open(self.url).read())
-            total = int(total_results(feed)[0].text)
-            start = int(start_index(feed)[0].text)
-            entries = entry(feed)
-            new_start = start + len(entries)
-            if new_start > total:
-                new_start = 0
-            return entries, new_start
-        elif self.type == "isbn":
-            feed = etree.fromstring(browser.open(self.url).read())
-            entries = entry(feed)
-            return entries, 0
-
-class ResultList(list):
-
-    def get_description(self, entry, verbose):
-        try:
-            desc = description(entry)
-            if desc:
-                return 'SUMMARY:\n'+desc[0].text
-        except:
-            report(verbose)
-
-    def get_title(self, entry):
-        candidates = [x.text for x in title(entry)]
-        return ': '.join(candidates)
-
-    def get_authors(self, entry):
-        m = creator(entry)
-        if not m:
-            m = []
-        m = [x.text for x in m]
-        return m
-
-    def get_tags(self, entry, verbose):
-        try:
-            btags = [x.attrib["name"] for x in tag(entry)]
-            tags = []
-            for t in btags:
-                tags.extend([y.strip() for y in t.split('/')])
-            tags = list(sorted(list(set(tags))))
-        except:
-            report(verbose)
-            tags = []
-        return [x.replace(',', ';') for x in tags]
-
-    def get_publisher(self, entry, verbose):
-        try:
-            pub = publisher(entry)[0].text
-        except:
-            pub = None
-        return pub
-
-    def get_isbn(self, entry, verbose):
-        try:
-            isbn13 = isbn(entry)[0].text
-        except Exception:
-            isbn13 = None
-        return isbn13
-
-    def get_date(self, entry, verbose):
-        try:
-            d = date(entry)
-            if d:
-                default = utcnow().replace(day=15)
-                d = parse_date(d[0].text, assume_utc=True, default=default)
-            else:
-                d = None
-        except:
-            report(verbose)
-            d = None
-        return d
-
-    def populate(self, entries, browser, verbose=False, api_key=''):
-        for x in entries:
-            try:
-                id_url = entry_id(x)[0].text
-                title = self.get_title(x)
-            except:
-                report(verbose)
-            mi = MetaInformation(title, self.get_authors(x))
-            try:
-                if api_key != '':
-                    id_url = id_url + "?apikey=" + api_key
-                raw = browser.open(id_url).read()
-                feed = etree.fromstring(raw)
-                x = entry(feed)[0]
-            except Exception as e:
-                if verbose:
-                    print 'Failed to get all details for an entry'
-                    print e
-            mi.comments = self.get_description(x, verbose)
-            mi.tags = self.get_tags(x, verbose)
-            mi.isbn = self.get_isbn(x, verbose)
-            mi.publisher = self.get_publisher(x, verbose)
-            mi.pubdate = self.get_date(x, verbose)
-            self.append(mi)
-
-def search(title=None, author=None, publisher=None, isbn=None,
-           verbose=False, max_results=40, api_key=None):
-    br   = browser()
-    start, entries = 1, []
-
-    if api_key is None:
-        api_key = CALIBRE_DOUBAN_API_KEY
-
-    while start > 0 and len(entries) <= max_results:
-        new, start = Query(title=title, author=author, publisher=publisher,
-                       isbn=isbn, max_results=max_results, start_index=start, api_key=api_key)(br, verbose)
-        if not new:
-            break
-        entries.extend(new)
-
-    entries = entries[:max_results]
-
-    ans = ResultList()
-    ans.populate(entries, br, verbose, api_key)
-    return ans
-
-def option_parser():
-    parser = OptionParser(textwrap.dedent(
-        '''\
-        %prog [options]
-
-        Fetch book metadata from Douban. You must specify one of title, author,
-        publisher or ISBN. If you specify ISBN the others are ignored. Will
-        fetch a maximum of 100 matches, so you should make your query as
-        specific as possible.
-        '''
-    ))
-    parser.add_option('-t', '--title', help='Book title')
-    parser.add_option('-a', '--author', help='Book author(s)')
-    parser.add_option('-p', '--publisher', help='Book publisher')
-    parser.add_option('-i', '--isbn', help='Book ISBN')
-    parser.add_option('-m', '--max-results', default=10,
-                      help='Maximum number of results to fetch')
-    parser.add_option('-v', '--verbose', default=0, action='count',
-                      help='Be more verbose about errors')
-    return parser
-
-def main(args=sys.argv):
-    parser = option_parser()
-    opts, args = parser.parse_args(args)
-    try:
-        results = search(opts.title, opts.author, opts.publisher, opts.isbn,
-                         verbose=opts.verbose, max_results=int(opts.max_results))
-    except AssertionError:
-        report(True)
-        parser.print_help()
-        return 1
-    for result in results:
-        print unicode(result).encode(preferred_encoding)
-        print
-
-if __name__ == '__main__':
-    sys.exit(main())
--- a/src/calibre/ebooks/metadata/extz.py
+++ b/src/calibre/ebooks/metadata/extz.py
@ -13,7 +13,7 @@ import posixpath
 from cStringIO import StringIO

 from calibre.ebooks.metadata import MetaInformation
-from calibre.ebooks.metadata.opf2 import OPF
+from calibre.ebooks.metadata.opf2 import OPF, metadata_to_opf
 from calibre.ptempfile import PersistentTemporaryFile
 from calibre.utils.zipfile import ZipFile, safe_replace

@ -31,9 +31,9 @@ def get_metadata(stream, extract_cover=True):
            opf = OPF(opf_stream)
            mi = opf.to_book_metadata()
            if extract_cover:
-                cover_name = opf.raster_cover
-                if cover_name:
-                    mi.cover_data = ('jpg', zf.read(cover_name))
+                cover_href = posixpath.relpath(opf.cover, os.path.dirname(stream.name))
+                if cover_href:
+                    mi.cover_data = ('jpg', zf.read(cover_href))
    except:
        return mi
    return mi
@ -59,17 +59,20 @@ def set_metadata(stream, mi):
        except:
            pass
    if new_cdata:
-        raster_cover = opf.raster_cover
-        if not raster_cover:
-            raster_cover = 'cover.jpg'
-        cpath = posixpath.join(posixpath.dirname(opf_path), raster_cover)
+        cover = opf.cover
+        if not cover:
+            cover = 'cover.jpg'
+        cpath = posixpath.join(posixpath.dirname(opf_path), cover)
        new_cover = _write_new_cover(new_cdata, cpath)
        replacements[cpath] = open(new_cover.name, 'rb')
+        mi.cover = cover

    # Update the metadata.
-    opf.smart_update(mi, replace_metadata=True)
+    old_mi = opf.to_book_metadata()
+    old_mi.smart_update(mi)
+    opf.smart_update(metadata_to_opf(old_mi), replace_metadata=True)
    newopf = StringIO(opf.render())
-    safe_replace(stream, opf_path, newopf, extra_replacements=replacements)
+    safe_replace(stream, opf_path, newopf, extra_replacements=replacements, add_missing=True)

    # Cleanup temporary files.
    try:
--- a/src/calibre/ebooks/metadata/fetch.py
+++ b/src/calibre/ebooks/metadata/fetch.py
@ -1,523 +0,0 @@
-from __future__ import with_statement
-__license__ = 'GPL 3'
-__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
-__docformat__ = 'restructuredtext en'
-
-import traceback, sys, textwrap, re
-from threading import Thread
-
-from calibre import prints
-from calibre.utils.config import OptionParser
-from calibre.utils.logging import default_log
-from calibre.utils.titlecase import titlecase
-from calibre.customize import Plugin
-from calibre.ebooks.metadata.covers import check_for_cover
-from calibre.utils.html2text import html2text
-
-metadata_config = None
-
-class MetadataSource(Plugin): # {{{
-    '''
-    Represents a source to query for metadata. Subclasses must implement
-    at least the fetch method.
-
-    When :meth:`fetch` is called, the `self` object will have the following
-    useful attributes (each of which may be None)::
-
-        title, book_author, publisher, isbn, log, verbose and extra
-
-    Use these attributes to construct the search query. extra is reserved for
-    future use.
-
-    The fetch method must store the results in `self.results` as a list of
-    :class:`Metadata` objects. If there is an error, it should be stored
-    in `self.exception` and `self.tb` (for the traceback).
-    '''
-
-    author = 'Kovid Goyal'
-
-    supported_platforms = ['windows', 'osx', 'linux']
-
-    #: The type of metadata fetched. 'basic' means basic metadata like
-    #: title/author/isbn/etc. 'social' means social metadata like
-    #: tags/rating/reviews/etc.
-    metadata_type = 'basic'
-
-    #: If not None, the customization dialog will allow for string
-    #: based customization as well the default customization. The
-    #: string customization will be saved in the site_customization
-    #: member.
-    string_customization_help = None
-
-    #: Set this to true if your plugin returns HTML markup in comments.
-    #: Then if the user disables HTML, calibre will automagically convert
-    #: the HTML to Markdown.
-    has_html_comments = False
-
-    type = _('Metadata download')
-
-    def __call__(self, title, author, publisher, isbn, verbose, log=None,
-            extra=None):
-        self.worker = Thread(target=self._fetch)
-        self.worker.daemon = True
-        self.title = title
-        self.verbose = verbose
-        self.book_author = author
-        self.publisher = publisher
-        self.isbn = isbn
-        self.log = log if log is not None else default_log
-        self.extra = extra
-        self.exception, self.tb, self.results = None, None, []
-        self.worker.start()
-
-    def _fetch(self):
-        try:
-            self.fetch()
-            if self.results:
-                c = self.config_store().get(self.name, {})
-                res = self.results
-                if hasattr(res, 'authors'):
-                    res = [res]
-                for mi in res:
-                    if not c.get('rating', True):
-                        mi.rating = None
-                    if not c.get('comments', True):
-                        mi.comments = None
-                    if not c.get('tags', True):
-                        mi.tags = []
-                    if self.has_html_comments and mi.comments and \
-                            c.get('textcomments', False):
-                        try:
-                            mi.comments = html2text(mi.comments)
-                        except:
-                            traceback.print_exc()
-                            mi.comments = None
-
-        except Exception as e:
-            self.exception = e
-            self.tb = traceback.format_exc()
-
-    def fetch(self):
-        '''
-        All the actual work is done here.
-        '''
-        raise NotImplementedError
-
-    def join(self):
-        return self.worker.join()
-
-    def is_alive(self):
-        return self.worker.is_alive()
-
-    def is_customizable(self):
-        return True
-
-    def config_store(self):
-        global metadata_config
-        if metadata_config is None:
-            from calibre.utils.config import XMLConfig
-            metadata_config = XMLConfig('plugins/metadata_download')
-        return metadata_config
-
-    def config_widget(self):
-        from PyQt4.Qt import QWidget, QVBoxLayout, QLabel, Qt, QLineEdit, \
-            QCheckBox
-        from calibre.customize.ui import config
-        w = QWidget()
-        w._layout = QVBoxLayout(w)
-        w.setLayout(w._layout)
-        if self.string_customization_help is not None:
-            w._sc_label = QLabel(self.string_customization_help, w)
-            w._layout.addWidget(w._sc_label)
-            customization = config['plugin_customization']
-            def_sc = customization.get(self.name, '')
-            if not def_sc:
-                def_sc = ''
-            w._sc = QLineEdit(def_sc, w)
-            w._layout.addWidget(w._sc)
-            w._sc_label.setWordWrap(True)
-            w._sc_label.setTextInteractionFlags(Qt.LinksAccessibleByMouse
-                    | Qt.LinksAccessibleByKeyboard)
-            w._sc_label.setOpenExternalLinks(True)
-        c = self.config_store()
-        c = c.get(self.name, {})
-        for x, l in {'rating':_('ratings'), 'tags':_('tags'),
-                'comments':_('description/reviews')}.items():
-            cb = QCheckBox(_('Download %s from %s')%(l,
-                self.name))
-            setattr(w, '_'+x, cb)
-            cb.setChecked(c.get(x, True))
-            w._layout.addWidget(cb)
-
-        if self.has_html_comments:
-            cb = QCheckBox(_('Convert comments downloaded from %s to plain text')%(self.name))
-            setattr(w, '_textcomments', cb)
-            cb.setChecked(c.get('textcomments', False))
-            w._layout.addWidget(cb)
-
-        return w
-
-    def save_settings(self, w):
-        dl_settings = {}
-        for x in ('rating', 'tags', 'comments'):
-            dl_settings[x] = getattr(w, '_'+x).isChecked()
-        if self.has_html_comments:
-            dl_settings['textcomments'] = getattr(w, '_textcomments').isChecked()
-        c = self.config_store()
-        c.set(self.name, dl_settings)
-        if hasattr(w, '_sc'):
-            sc = unicode(w._sc.text()).strip()
-            from calibre.customize.ui import customize_plugin
-            customize_plugin(self, sc)
-
-    def customization_help(self):
-        return 'This plugin can only be customized using the GUI'
-
-    # }}}
-
-class GoogleBooks(MetadataSource): # {{{
-
-    name = 'Google Books'
-    description = _('Downloads metadata from Google Books')
-
-    def fetch(self):
-        from calibre.ebooks.metadata.google_books import search
-        try:
-            self.results = search(self.title, self.book_author, self.publisher,
-                                  self.isbn, max_results=10,
-                                  verbose=self.verbose)
-        except Exception as e:
-            self.exception = e
-            self.tb = traceback.format_exc()
-
-    # }}}
-
-class ISBNDB(MetadataSource): # {{{
-
-    name = 'IsbnDB'
-    description = _('Downloads metadata from isbndb.com')
-
-    def fetch(self):
-        if not self.site_customization:
-            return
-        from calibre.ebooks.metadata.isbndb import option_parser, create_books
-        args = ['isbndb']
-        if self.isbn:
-            args.extend(['--isbn', self.isbn])
-        else:
-            if self.title:
-                args.extend(['--title', self.title])
-            if self.book_author:
-                args.extend(['--author', self.book_author])
-            if self.publisher:
-                args.extend(['--publisher', self.publisher])
-        if self.verbose:
-            args.extend(['--verbose'])
-        args.append(self.site_customization) # IsbnDb key
-        try:
-            opts, args = option_parser().parse_args(args)
-            self.results = create_books(opts, args)
-        except Exception as e:
-            self.exception = e
-            self.tb = traceback.format_exc()
-
-    @property
-    def string_customization_help(self):
-        ans = _('To use isbndb.com you must sign up for a %sfree account%s '
-                'and enter your access key below.')
-        return '<p>'+ans%('<a href="http://www.isbndb.com">', '</a>')
-
-    # }}}
-
-class Amazon(MetadataSource): # {{{
-
-    name = 'Amazon'
-    metadata_type = 'social'
-    description = _('Downloads social metadata from amazon.com')
-
-    has_html_comments = True
-
-    def fetch(self):
-        if not self.isbn:
-            return
-        from calibre.ebooks.metadata.amazon import get_social_metadata
-        try:
-            self.results = get_social_metadata(self.title, self.book_author,
-                    self.publisher, self.isbn)
-        except Exception as e:
-            self.exception = e
-            self.tb = traceback.format_exc()
-
-    # }}}
-
-class KentDistrictLibrary(MetadataSource): # {{{
-
-    name = 'Kent District Library'
-    metadata_type = 'social'
-    description = _('Downloads series information from ww2.kdl.org. '
-            'This website cannot handle large numbers of queries, '
-            'so the plugin is disabled by default.')
-
-    def fetch(self):
-        if not self.title or not self.book_author:
-            return
-        from calibre.ebooks.metadata.kdl import get_series
-        try:
-            self.results = get_series(self.title, self.book_author)
-        except Exception as e:
-            import traceback
-            traceback.print_exc()
-            self.exception = e
-            self.tb = traceback.format_exc()
-
-    # }}}
-
-
-def result_index(source, result):
-    if not result.isbn:
-        return -1
-    for i, x in enumerate(source):
-        if x.isbn == result.isbn:
-            return i
-    return -1
-
-def merge_results(one, two):
-    if two is not None and one is not None:
-        for x in two:
-            idx = result_index(one, x)
-            if idx < 0:
-                one.append(x)
-            else:
-                one[idx].smart_update(x)
-
-class MetadataSources(object):
-
-    def __init__(self, sources):
-        self.sources = sources
-
-    def __enter__(self):
-        for s in self.sources:
-            s.__enter__()
-        return self
-
-    def __exit__(self, *args):
-        for s in self.sources:
-            s.__exit__()
-
-    def __call__(self, *args, **kwargs):
-        for s in self.sources:
-            s(*args, **kwargs)
-
-    def join(self):
-        for s in self.sources:
-            s.join()
-
-def filter_metadata_results(item):
-    keywords = ["audio", "tape", "cassette", "abridged", "playaway"]
-    for keyword in keywords:
-        if item.publisher and keyword in item.publisher.lower():
-            return False
-    return True
-
-def do_cover_check(item):
-    item.has_cover = False
-    try:
-        item.has_cover = check_for_cover(item)
-    except:
-        pass # Cover not found
-
-def check_for_covers(items):
-    threads = [Thread(target=do_cover_check, args=(item,)) for item in items]
-    for t in threads: t.start()
-    for t in threads: t.join()
-
-def search(title=None, author=None, publisher=None, isbn=None, isbndb_key=None,
-           verbose=0):
-    assert not(title is None and author is None and publisher is None and \
-                   isbn is None)
-    from calibre.customize.ui import metadata_sources, migrate_isbndb_key
-    migrate_isbndb_key()
-    if isbn is not None:
-        isbn = re.sub(r'[^a-zA-Z0-9]', '', isbn).upper()
-    fetchers = list(metadata_sources(isbndb_key=isbndb_key))
-    with MetadataSources(fetchers) as manager:
-        manager(title, author, publisher, isbn, verbose)
-        manager.join()
-
-    results = list(fetchers[0].results) if fetchers else []
-    for fetcher in fetchers[1:]:
-        merge_results(results, fetcher.results)
-
-    results = list(filter(filter_metadata_results, results))
-
-    check_for_covers(results)
-
-    words = ("the", "a", "an", "of", "and")
-    prefix_pat = re.compile(r'^(%s)\s+'%("|".join(words)))
-    trailing_paren_pat = re.compile(r'\(.*\)$')
-    whitespace_pat = re.compile(r'\s+')
-
-    def sort_func(x, y):
-
-        def cleanup_title(s):
-            if s is None:
-                s = _('Unknown')
-            s = s.strip().lower()
-            s = prefix_pat.sub(' ', s)
-            s = trailing_paren_pat.sub('', s)
-            s = whitespace_pat.sub(' ', s)
-            return s.strip()
-
-        t = cleanup_title(title)
-        x_title = cleanup_title(x.title)
-        y_title = cleanup_title(y.title)
-
-        # prefer titles that start with the search title
-        tx = cmp(t, x_title)
-        ty = cmp(t, y_title)
-        result = 0 if abs(tx) == abs(ty) else abs(tx) - abs(ty)
-
-        # then prefer titles that have a cover image
-        if result == 0:
-            result = -cmp(x.has_cover, y.has_cover)
-
-        # then prefer titles with the longest comment, with in 10%
-        if result == 0:
-            cx = len(x.comments.strip() if x.comments else '')
-            cy = len(y.comments.strip() if y.comments else '')
-            t = (cx + cy) / 20
-            result = cy - cx
-            if abs(result) < t:
-                result = 0
-
-        return result
-
-    results = sorted(results, cmp=sort_func)
-
-    # if for some reason there is no comment in the top selection, go looking for one
-    if len(results) > 1:
-        if not results[0].comments or len(results[0].comments) == 0:
-            for r in results[1:]:
-                try:
-                    if title and title.lower() == r.title[:len(title)].lower() \
-                            and r.comments and len(r.comments):
-                        results[0].comments = r.comments
-                        break
-                except:
-                    pass
-        # Find a pubdate
-        pubdate = None
-        for r in results:
-            if r.pubdate is not None:
-                pubdate = r.pubdate
-                break
-        if pubdate is not None:
-            for r in results:
-                if r.pubdate is None:
-                    r.pubdate = pubdate
-
-    def fix_case(x):
-        if x:
-            x = titlecase(x)
-        return x
-
-    for r in results:
-        r.title = fix_case(r.title)
-        if r.authors:
-            r.authors = list(map(fix_case, r.authors))
-
-    return results, [(x.name, x.exception, x.tb) for x in fetchers]
-
-def get_social_metadata(mi, verbose=0):
-    from calibre.customize.ui import metadata_sources
-    fetchers = list(metadata_sources(metadata_type='social'))
-    with MetadataSources(fetchers) as manager:
-        manager(mi.title, mi.authors, mi.publisher, mi.isbn, verbose)
-        manager.join()
-    ratings, tags, comments, series, series_index = [], set([]), set([]), None, None
-    for fetcher in fetchers:
-        if fetcher.results:
-            dmi = fetcher.results
-            if dmi.rating is not None:
-                ratings.append(dmi.rating)
-            if dmi.tags:
-                for t in dmi.tags:
-                    tags.add(t)
-            if mi.pubdate is None and dmi.pubdate is not None:
-                mi.pubdate = dmi.pubdate
-            if dmi.comments:
-                comments.add(dmi.comments)
-            if dmi.series is not None:
-                series = dmi.series
-                if dmi.series_index is not None:
-                    series_index = dmi.series_index
-    if ratings:
-        rating = sum(ratings)/float(len(ratings))
-        if mi.rating is None or mi.rating < 0.1:
-            mi.rating = rating
-        else:
-            mi.rating = (mi.rating + rating)/2.0
-    if tags:
-        if not mi.tags:
-            mi.tags = []
-        mi.tags += list(tags)
-        mi.tags = list(sorted(list(set(mi.tags))))
-    if comments:
-        if not mi.comments or len(mi.comments)+20 < len(' '.join(comments)):
-            mi.comments = ''
-            for x in comments:
-                mi.comments += x+'\n\n'
-    if series and series_index is not None:
-        mi.series = series
-        mi.series_index = series_index
-
-    return [(x.name, x.exception, x.tb) for x in fetchers if x.exception is not
-            None]
-
-
-
-def option_parser():
-    parser = OptionParser(textwrap.dedent(
-        '''\
-        %prog [options]
-
-        Fetch book metadata from online sources. You must specify at least one
-        of title, author, publisher or ISBN. If you specify ISBN, the others
-        are ignored.
-        '''
-    ))
-    parser.add_option('-t', '--title', help='Book title')
-    parser.add_option('-a', '--author', help='Book author(s)')
-    parser.add_option('-p', '--publisher', help='Book publisher')
-    parser.add_option('-i', '--isbn', help='Book ISBN')
-    parser.add_option('-m', '--max-results', default=10,
-                      help='Maximum number of results to fetch')
-    parser.add_option('-k', '--isbndb-key',
-                      help=('The access key for your ISBNDB.com account. '
-                      'Only needed if you want to search isbndb.com '
-                      'and you haven\'t customized the IsbnDB plugin.'))
-    parser.add_option('-v', '--verbose', default=0, action='count',
-                      help='Be more verbose about errors')
-    return parser
-
-def main(args=sys.argv):
-    parser = option_parser()
-    opts, args = parser.parse_args(args)
-    results, exceptions = search(opts.title, opts.author, opts.publisher,
-                                 opts.isbn, opts.isbndb_key, opts.verbose)
-    social_exceptions = []
-    for result in results:
-        social_exceptions.extend(get_social_metadata(result, opts.verbose))
-        prints(unicode(result))
-        print
-
-    for name, exception, tb in exceptions+social_exceptions:
-        if exception is not None:
-            print 'WARNING: Fetching from', name, 'failed with error:'
-            print exception
-            print tb
-
-    return 0
-
-if __name__ == '__main__':
-    sys.exit(main())
--- a/src/calibre/ebooks/metadata/fictionwise.py
+++ b/src/calibre/ebooks/metadata/fictionwise.py
@ -1,390 +0,0 @@
-from __future__ import with_statement
-__license__ = 'GPL 3'
-__copyright__ = '2010, sengian <sengian1@gmail.com>'
-__docformat__ = 'restructuredtext en'
-
-import sys, textwrap, re, traceback, socket
-from urllib import urlencode
-
-from lxml.html import soupparser, tostring
-
-from calibre import browser, preferred_encoding
-from calibre.ebooks.chardet import xml_to_unicode
-from calibre.ebooks.metadata import MetaInformation, check_isbn, \
-    authors_to_sort_string
-from calibre.library.comments import sanitize_comments_html
-from calibre.ebooks.metadata.fetch import MetadataSource
-from calibre.utils.config import OptionParser
-from calibre.utils.date import parse_date, utcnow
-from calibre.utils.cleantext import clean_ascii_chars
-
-class Fictionwise(MetadataSource): # {{{
-
-    author = 'Sengian'
-    name = 'Fictionwise'
-    description = _('Downloads metadata from Fictionwise')
-
-    has_html_comments = True
-
-    def fetch(self):
-        try:
-            self.results = search(self.title, self.book_author, self.publisher,
-                self.isbn, max_results=10, verbose=self.verbose)
-        except Exception as e:
-            self.exception = e
-            self.tb = traceback.format_exc()
-
-    # }}}
-
-class FictionwiseError(Exception):
-    pass
-
-def report(verbose):
-    if verbose:
-        traceback.print_exc()
-
-class Query(object):
-
-    BASE_URL = 'http://www.fictionwise.com/servlet/mw'
-
-    def __init__(self, title=None, author=None, publisher=None, keywords=None, max_results=20):
-        assert not(title is None and author is None and publisher is None and keywords is None)
-        assert (max_results < 21)
-
-        self.max_results = int(max_results)
-        q = {   'template' : 'searchresults_adv.htm' ,
-                'searchtitle' : '',
-                'searchauthor' : '',
-                'searchpublisher' : '',
-                'searchkeyword' : '',
-                #possibilities startoflast, fullname, lastfirst
-                'searchauthortype' : 'startoflast',
-                'searchcategory' : '',
-                'searchcategory2' : '',
-                'searchprice_s' : '0',
-                'searchprice_e' : 'ANY',
-                'searchformat' : '',
-                'searchgeo' : 'US',
-                'searchfwdatetype' : '',
-                #maybe use dates fields if needed?
-                #'sortorder' : 'DESC',
-                #many options available: b.SortTitle, a.SortName,
-                #b.DateFirstPublished, b.FWPublishDate
-                'sortby' : 'b.SortTitle'
-            }
-        if title is not None:
-            q['searchtitle'] = title
-        if author is not None:
-            q['searchauthor'] = author
-        if publisher is not None:
-            q['searchpublisher'] = publisher
-        if keywords is not None:
-            q['searchkeyword'] = keywords
-
-        if isinstance(q, unicode):
-            q = q.encode('utf-8')
-        self.urldata = urlencode(q)
-
-    def __call__(self, browser, verbose, timeout = 5.):
-        if verbose:
-            print _('Query: %s') % self.BASE_URL+self.urldata
-
-        try:
-            raw = browser.open_novisit(self.BASE_URL, self.urldata, timeout=timeout).read()
-        except Exception as e:
-            report(verbose)
-            if callable(getattr(e, 'getcode', None)) and \
-                    e.getcode() == 404:
-                return
-            if isinstance(getattr(e, 'args', [None])[0], socket.timeout):
-                raise FictionwiseError(_('Fictionwise timed out. Try again later.'))
-            raise FictionwiseError(_('Fictionwise encountered an error.'))
-        if '<title>404 - ' in raw:
-            return
-        raw = xml_to_unicode(raw, strip_encoding_pats=True,
-                resolve_entities=True)[0]
-        try:
-            feed = soupparser.fromstring(raw)
-        except:
-            try:
-                #remove ASCII invalid chars
-                feed = soupparser.fromstring(clean_ascii_chars(raw))
-            except:
-                return None
-
-        # get list of results as links
-        results = feed.xpath("//table[3]/tr/td[2]/table/tr/td/p/table[2]/tr[@valign]")
-        results = results[:self.max_results]
-        results = [i.xpath('descendant-or-self::a')[0].get('href') for i in results]
-        #return feed if no links ie normally a single book or nothing
-        if not results:
-            results = [feed]
-        return results
-
-class ResultList(list):
-
-    BASE_URL = 'http://www.fictionwise.com'
-    COLOR_VALUES = {'BLUE': 4, 'GREEN': 3, 'YELLOW': 2, 'RED': 1, 'NA': 0}
-
-    def __init__(self):
-        self.retitle = re.compile(r'\[[^\[\]]+\]')
-        self.rechkauth = re.compile(r'.*book\s*by', re.I)
-        self.redesc = re.compile(r'book\s*description\s*:\s*(<br[^>]+>)*(?P<desc>.*)<br[^>]*>.{,15}publisher\s*:', re.I)
-        self.repub = re.compile(r'.*publisher\s*:\s*', re.I)
-        self.redate = re.compile(r'.*release\s*date\s*:\s*', re.I)
-        self.retag = re.compile(r'.*book\s*category\s*:\s*', re.I)
-        self.resplitbr = re.compile(r'<br[^>]*>', re.I)
-        self.recomment = re.compile(r'(?s)<!--.*?-->')
-        self.reimg = re.compile(r'<img[^>]*>', re.I)
-        self.resanitize = re.compile(r'\[HTML_REMOVED\]\s*', re.I)
-        self.renbcom = re.compile('(?P<nbcom>\d+)\s*Reader Ratings:')
-        self.recolor = re.compile('(?P<ncolor>[^/]+).gif')
-        self.resplitbrdiv = re.compile(r'(<br[^>]+>|</?div[^>]*>)', re.I)
-        self.reisbn = re.compile(r'.*ISBN\s*:\s*', re.I)
-
-    def strip_tags_etree(self, etreeobj, invalid_tags):
-        for (itag, rmv) in invalid_tags.iteritems():
-            if rmv:
-                for elts in etreeobj.getiterator(itag):
-                    elts.drop_tree()
-            else:
-                for elts in etreeobj.getiterator(itag):
-                    elts.drop_tag()
-
-    def clean_entry(self, entry, invalid_tags = {'script': True},
-                invalid_id = (), invalid_class=(), invalid_xpath = ()):
-        #invalid_tags: remove tag and keep content if False else remove
-        #remove tags
-        if invalid_tags:
-            self.strip_tags_etree(entry, invalid_tags)
-        #remove xpath
-        if invalid_xpath:
-            for eltid in invalid_xpath:
-                elt = entry.xpath(eltid)
-                for el in elt:
-                    el.drop_tree()
-        #remove id
-        if invalid_id:
-            for eltid in invalid_id:
-                elt = entry.get_element_by_id(eltid)
-                if elt is not None:
-                    elt.drop_tree()
-        #remove class
-        if invalid_class:
-            for eltclass in invalid_class:
-                elts = entry.find_class(eltclass)
-                if elts is not None:
-                    for elt in elts:
-                        elt.drop_tree()
-
-    def output_entry(self, entry, prettyout = True, htmlrm="\d+"):
-        out = tostring(entry, pretty_print=prettyout)
-        #try to work around tostring to remove this encoding for exemle
-        reclean = re.compile('(\n+|\t+|\r+|&#'+htmlrm+';)')
-        return reclean.sub('', out)
-
-    def get_title(self, entry):
-        title = entry.findtext('./')
-        return self.retitle.sub('', title).strip()
-
-    def get_authors(self, entry):
-        authortext = entry.find('./br').tail
-        if not self.rechkauth.search(authortext):
-            return []
-        authortext = self.rechkauth.sub('', authortext)
-        return [a.strip() for a in authortext.split('&')]
-
-    def get_rating(self, entrytable, verbose):
-        nbcomment = tostring(entrytable.getprevious())
-        try:
-            nbcomment = self.renbcom.search(nbcomment).group("nbcom")
-        except:
-            report(verbose)
-            return None
-        hval = dict((self.COLOR_VALUES[self.recolor.search(image.get('src', default='NA.gif')).group("ncolor")],
-                    float(image.get('height', default=0))) \
-                        for image in entrytable.getiterator('img'))
-        #ratings as x/5
-        return float(1.25*sum(k*v for (k, v) in hval.iteritems())/sum(hval.itervalues()))
-
-    def get_description(self, entry):
-        description = self.output_entry(entry.xpath('./p')[1],htmlrm="")
-        description = self.redesc.search(description)
-        if not description or not description.group("desc"):
-            return None
-        #remove invalid tags
-        description = self.reimg.sub('', description.group("desc"))
-        description = self.recomment.sub('', description)
-        description = self.resanitize.sub('', sanitize_comments_html(description))
-        return _('SUMMARY:\n %s') % re.sub(r'\n\s+</p>','\n</p>', description)
-
-    def get_publisher(self, entry):
-        publisher = self.output_entry(entry.xpath('./p')[1])
-        publisher = filter(lambda x: self.repub.search(x) is not None,
-            self.resplitbr.split(publisher))
-        if not len(publisher):
-            return None
-        publisher = self.repub.sub('', publisher[0])
-        return publisher.split(',')[0].strip()
-
-    def get_tags(self, entry):
-        tag = self.output_entry(entry.xpath('./p')[1])
-        tag = filter(lambda x: self.retag.search(x) is not None,
-            self.resplitbr.split(tag))
-        if not len(tag):
-            return []
-        return map(lambda x: x.strip(), self.retag.sub('', tag[0]).split('/'))
-
-    def get_date(self, entry, verbose):
-        date = self.output_entry(entry.xpath('./p')[1])
-        date = filter(lambda x: self.redate.search(x) is not None,
-            self.resplitbr.split(date))
-        if not len(date):
-            return None
-        try:
-            d = self.redate.sub('', date[0])
-            if d:
-                default = utcnow().replace(day=15)
-                d = parse_date(d, assume_utc=True, default=default)
-            else:
-                d = None
-        except:
-            report(verbose)
-            d = None
-        return d
-
-    def get_ISBN(self, entry):
-        isbns = self.output_entry(entry.xpath('./p')[2])
-        isbns = filter(lambda x: self.reisbn.search(x) is not None,
-            self.resplitbrdiv.split(isbns))
-        if not len(isbns):
-            return None
-        isbns = [self.reisbn.sub('', x) for x in isbns if check_isbn(self.reisbn.sub('', x))]
-        return sorted(isbns, cmp=lambda x,y:cmp(len(x), len(y)))[-1]
-
-    def fill_MI(self, entry, title, authors, ratings, verbose):
-        mi = MetaInformation(title, authors)
-        mi.rating = ratings
-        mi.comments = self.get_description(entry)
-        mi.publisher = self.get_publisher(entry)
-        mi.tags = self.get_tags(entry)
-        mi.pubdate = self.get_date(entry, verbose)
-        mi.isbn = self.get_ISBN(entry)
-        mi.author_sort = authors_to_sort_string(authors)
-        return mi
-
-    def get_individual_metadata(self, browser, linkdata, verbose):
-        try:
-            raw = browser.open_novisit(self.BASE_URL + linkdata).read()
-        except Exception as e:
-            report(verbose)
-            if callable(getattr(e, 'getcode', None)) and \
-                    e.getcode() == 404:
-                return
-            if isinstance(getattr(e, 'args', [None])[0], socket.timeout):
-                raise FictionwiseError(_('Fictionwise timed out. Try again later.'))
-            raise FictionwiseError(_('Fictionwise encountered an error.'))
-        if '<title>404 - ' in raw:
-            report(verbose)
-            return
-        raw = xml_to_unicode(raw, strip_encoding_pats=True,
-                resolve_entities=True)[0]
-        try:
-            return soupparser.fromstring(raw)
-        except:
-            try:
-                #remove ASCII invalid chars
-                return soupparser.fromstring(clean_ascii_chars(raw))
-            except:
-                return None
-
-    def populate(self, entries, browser, verbose=False):
-        inv_tags ={'script': True, 'a': False, 'font': False, 'strong': False, 'b': False,
-            'ul': False, 'span': False}
-        inv_xpath =('./table',)
-        #single entry
-        if len(entries) == 1 and not isinstance(entries[0], str):
-            try:
-                entry = entries.xpath("//table[3]/tr/td[2]/table[1]/tr/td/font/table/tr/td")
-                self.clean_entry(entry, invalid_tags=inv_tags, invalid_xpath=inv_xpath)
-                title = self.get_title(entry)
-                #maybe strenghten the search
-                ratings =  self.get_rating(entry.xpath("./p/table")[1], verbose)
-                authors = self.get_authors(entry)
-            except Exception as e:
-                if verbose:
-                    print _('Failed to get all details for an entry')
-                    print e
-                return
-            self.append(self.fill_MI(entry, title, authors, ratings, verbose))
-        else:
-            #multiple entries
-            for x in entries:
-                try:
-                    entry = self.get_individual_metadata(browser, x, verbose)
-                    entry = entry.xpath("//table[3]/tr/td[2]/table[1]/tr/td/font/table/tr/td")[0]
-                    self.clean_entry(entry, invalid_tags=inv_tags, invalid_xpath=inv_xpath)
-                    title = self.get_title(entry)
-                    #maybe strenghten the search
-                    ratings =  self.get_rating(entry.xpath("./p/table")[1], verbose)
-                    authors = self.get_authors(entry)
-                except Exception as e:
-                    if verbose:
-                        print _('Failed to get all details for an entry')
-                        print e
-                    continue
-                self.append(self.fill_MI(entry, title, authors, ratings, verbose))
-
-
-def search(title=None, author=None, publisher=None, isbn=None,
-           min_viewability='none', verbose=False, max_results=5,
-            keywords=None):
-    br = browser()
-    entries = Query(title=title, author=author, publisher=publisher,
-        keywords=keywords, max_results=max_results)(br, verbose, timeout = 15.)
-
-    #List of entry
-    ans = ResultList()
-    ans.populate(entries, br, verbose)
-    return ans
-
-
-def option_parser():
-    parser = OptionParser(textwrap.dedent(\
-    _('''\
-        %prog [options]
-
-        Fetch book metadata from Fictionwise. You must specify one of title, author,
-        or keywords. No ISBN specification possible. Will fetch a maximum of 20 matches,
-        so you should make your query as specific as possible.
-    ''')
-    ))
-    parser.add_option('-t', '--title', help=_('Book title'))
-    parser.add_option('-a', '--author', help=_('Book author(s)'))
-    parser.add_option('-p', '--publisher', help=_('Book publisher'))
-    parser.add_option('-k', '--keywords', help=_('Keywords'))
-    parser.add_option('-m', '--max-results', default=20,
-                      help=_('Maximum number of results to fetch'))
-    parser.add_option('-v', '--verbose', default=0, action='count',
-                      help=_('Be more verbose about errors'))
-    return parser
-
-def main(args=sys.argv):
-    parser = option_parser()
-    opts, args = parser.parse_args(args)
-    try:
-        results = search(opts.title, opts.author, publisher=opts.publisher,
-            keywords=opts.keywords, verbose=opts.verbose, max_results=opts.max_results)
-    except AssertionError:
-        report(True)
-        parser.print_help()
-        return 1
-    if results is None or len(results) == 0:
-        print _('No result found for this search!')
-        return 0
-    for result in results:
-        print unicode(result).encode(preferred_encoding, 'replace')
-        print
-
-if __name__ == '__main__':
-    sys.exit(main())
--- a/src/calibre/ebooks/metadata/google_books.py
+++ b/src/calibre/ebooks/metadata/google_books.py
@ -1,247 +0,0 @@
-from __future__ import with_statement
-__license__ = 'GPL 3'
-__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
-__docformat__ = 'restructuredtext en'
-
-import sys, textwrap
-from urllib import urlencode
-from functools import partial
-
-from lxml import etree
-
-from calibre import browser, preferred_encoding
-from calibre.ebooks.metadata import MetaInformation
-from calibre.utils.config import OptionParser
-from calibre.utils.date import parse_date, utcnow
-
-NAMESPACES = {
-              'openSearch':'http://a9.com/-/spec/opensearchrss/1.0/',
-              'atom' : 'http://www.w3.org/2005/Atom',
-              'dc': 'http://purl.org/dc/terms'
-            }
-XPath = partial(etree.XPath, namespaces=NAMESPACES)
-
-total_results  = XPath('//openSearch:totalResults')
-start_index    = XPath('//openSearch:startIndex')
-items_per_page = XPath('//openSearch:itemsPerPage')
-entry          = XPath('//atom:entry')
-entry_id       = XPath('descendant::atom:id')
-creator        = XPath('descendant::dc:creator')
-identifier     = XPath('descendant::dc:identifier')
-title          = XPath('descendant::dc:title')
-date           = XPath('descendant::dc:date')
-publisher      = XPath('descendant::dc:publisher')
-subject        = XPath('descendant::dc:subject')
-description    = XPath('descendant::dc:description')
-language       = XPath('descendant::dc:language')
-
-def report(verbose):
-    if verbose:
-        import traceback
-        traceback.print_exc()
-
-
-class Query(object):
-
-    BASE_URL = 'http://books.google.com/books/feeds/volumes?'
-
-    def __init__(self, title=None, author=None, publisher=None, isbn=None,
-                 max_results=20, min_viewability='none', start_index=1):
-        assert not(title is None and author is None and publisher is None and \
-                   isbn is None)
-        assert (max_results < 21)
-        assert (min_viewability in ('none', 'partial', 'full'))
-        q = ''
-        if isbn is not None:
-            q += 'isbn:'+isbn
-        else:
-            def build_term(prefix, parts):
-                return ' '.join('in'+prefix + ':' + x for x in parts)
-            if title is not None:
-                q += build_term('title', title.split())
-            if author is not None:
-                q += ('+' if q else '')+build_term('author', author.split())
-            if publisher is not None:
-                q += ('+' if q else '')+build_term('publisher', publisher.split())
-
-        if isinstance(q, unicode):
-            q = q.encode('utf-8')
-        self.url = self.BASE_URL+urlencode({
-            'q':q,
-            'max-results':max_results,
-            'start-index':start_index,
-            'min-viewability':min_viewability,
-            })
-
-    def __call__(self, browser, verbose):
-        if verbose:
-            print 'Query:', self.url
-        feed = etree.fromstring(browser.open(self.url).read())
-        #print etree.tostring(feed, pretty_print=True)
-        total = int(total_results(feed)[0].text)
-        start = int(start_index(feed)[0].text)
-        entries = entry(feed)
-        new_start = start + len(entries)
-        if new_start > total:
-            new_start = 0
-        return entries, new_start
-
-
-class ResultList(list):
-
-    def get_description(self, entry, verbose):
-        try:
-            desc = description(entry)
-            if desc:
-                return 'SUMMARY:\n'+desc[0].text
-        except:
-            report(verbose)
-
-    def get_language(self, entry, verbose):
-        try:
-            l = language(entry)
-            if l:
-                return l[0].text
-        except:
-            report(verbose)
-
-    def get_title(self, entry):
-        candidates = [x.text for x in title(entry)]
-        return ': '.join(candidates)
-
-    def get_authors(self, entry):
-        m = creator(entry)
-        if not m:
-            m = []
-        m = [x.text for x in m]
-        return m
-
-    def get_author_sort(self, entry, verbose):
-        for x in creator(entry):
-            for key, val in x.attrib.items():
-                if key.endswith('file-as'):
-                    return val
-
-    def get_identifiers(self, entry, mi):
-        isbns = []
-        for x in identifier(entry):
-            t = str(x.text).strip()
-            if t[:5].upper() in ('ISBN:', 'LCCN:', 'OCLC:'):
-                if t[:5].upper() == 'ISBN:':
-                    isbns.append(t[5:])
-        if isbns:
-            mi.isbn = sorted(isbns, cmp=lambda x,y:cmp(len(x), len(y)))[-1]
-
-    def get_tags(self, entry, verbose):
-        try:
-            btags = [x.text for x in subject(entry)]
-            tags = []
-            for t in btags:
-                tags.extend([y.strip() for y in t.split('/')])
-            tags = list(sorted(list(set(tags))))
-        except:
-            report(verbose)
-            tags = []
-        return [x.replace(',', ';') for x in tags]
-
-    def get_publisher(self, entry, verbose):
-        try:
-            pub = publisher(entry)[0].text
-        except:
-            pub = None
-        return pub
-
-    def get_date(self, entry, verbose):
-        try:
-            d = date(entry)
-            if d:
-                default = utcnow().replace(day=15)
-                d = parse_date(d[0].text, assume_utc=True, default=default)
-            else:
-                d = None
-        except:
-            report(verbose)
-            d = None
-        return d
-
-    def populate(self, entries, browser, verbose=False):
-        for x in entries:
-            try:
-                id_url = entry_id(x)[0].text
-                title = self.get_title(x)
-            except:
-                report(verbose)
-            mi = MetaInformation(title, self.get_authors(x))
-            try:
-                raw = browser.open(id_url).read()
-                feed = etree.fromstring(raw)
-                x = entry(feed)[0]
-            except Exception as e:
-                if verbose:
-                    print 'Failed to get all details for an entry'
-                    print e
-            mi.author_sort = self.get_author_sort(x, verbose)
-            mi.comments = self.get_description(x, verbose)
-            self.get_identifiers(x, mi)
-            mi.tags = self.get_tags(x, verbose)
-            mi.publisher = self.get_publisher(x, verbose)
-            mi.pubdate = self.get_date(x, verbose)
-            mi.language = self.get_language(x, verbose)
-            self.append(mi)
-
-
-def search(title=None, author=None, publisher=None, isbn=None,
-           min_viewability='none', verbose=False, max_results=40):
-    br   = browser()
-    br.set_handle_gzip(True)
-    start, entries = 1, []
-    while start > 0 and len(entries) <= max_results:
-        new, start = Query(title=title, author=author, publisher=publisher,
-                       isbn=isbn, min_viewability=min_viewability)(br, verbose)
-        if not new:
-            break
-        entries.extend(new)
-
-    entries = entries[:max_results]
-
-    ans = ResultList()
-    ans.populate(entries, br, verbose)
-    return ans
-
-def option_parser():
-    parser = OptionParser(textwrap.dedent(
-        '''\
-        %prog [options]
-
-        Fetch book metadata from Google. You must specify one of title, author,
-        publisher or ISBN. If you specify ISBN the others are ignored. Will
-        fetch a maximum of 100 matches, so you should make your query as
-        specific as possible.
-        '''
-    ))
-    parser.add_option('-t', '--title', help='Book title')
-    parser.add_option('-a', '--author', help='Book author(s)')
-    parser.add_option('-p', '--publisher', help='Book publisher')
-    parser.add_option('-i', '--isbn', help='Book ISBN')
-    parser.add_option('-m', '--max-results', default=10,
-                      help='Maximum number of results to fetch')
-    parser.add_option('-v', '--verbose', default=0, action='count',
-                      help='Be more verbose about errors')
-    return parser
-
-def main(args=sys.argv):
-    parser = option_parser()
-    opts, args = parser.parse_args(args)
-    try:
-        results = search(opts.title, opts.author, opts.publisher, opts.isbn,
-                         verbose=opts.verbose, max_results=opts.max_results)
-    except AssertionError:
-        report(True)
-        parser.print_help()
-        return 1
-    for result in results:
-        print unicode(result).encode(preferred_encoding)
-        print
-
-if __name__ == '__main__':
-    sys.exit(main())
--- a/src/calibre/ebooks/metadata/isbndb.py
+++ b/src/calibre/ebooks/metadata/isbndb.py
@ -1,159 +0,0 @@
-__license__   = 'GPL v3'
-__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
-'''
-Interface to isbndb.com. My key HLLXQX2A.
-'''
-
-import sys, re
-from urllib import quote
-
-from calibre.utils.config import OptionParser
-from calibre.ebooks.metadata.book.base import Metadata
-from calibre.ebooks.BeautifulSoup import BeautifulStoneSoup
-from calibre import browser
-
-BASE_URL = 'http://isbndb.com/api/books.xml?access_key=%(key)s&page_number=1&results=subjects,authors,texts&'
-
-class ISBNDBError(Exception):
-    pass
-
-def fetch_metadata(url, max=3, timeout=5.):
-    books = []
-    page_number = 1
-    total_results = 31
-    br = browser()
-    while len(books) < total_results and max > 0:
-        try:
-            raw = br.open(url, timeout=timeout).read()
-        except Exception as err:
-            raise ISBNDBError('Could not fetch ISBNDB metadata. Error: '+str(err))
-        soup = BeautifulStoneSoup(raw,
-                convertEntities=BeautifulStoneSoup.XML_ENTITIES)
-        book_list = soup.find('booklist')
-        if book_list is None:
-            errmsg = soup.find('errormessage').string
-            raise ISBNDBError('Error fetching metadata: '+errmsg)
-        total_results = int(book_list['total_results'])
-        page_number += 1
-        np = '&page_number=%s&'%page_number
-        url = re.sub(r'\&page_number=\d+\&', np, url)
-        books.extend(book_list.findAll('bookdata'))
-        max -= 1
-    return books
-
-
-class ISBNDBMetadata(Metadata):
-
-    def __init__(self, book):
-        Metadata.__init__(self, None)
-
-        def tostring(e):
-            if not hasattr(e, 'string'):
-                return None
-            ans = e.string
-            if ans is not None:
-                ans = unicode(ans).strip()
-            if not ans:
-                ans = None
-            return ans
-
-        self.isbn = unicode(book.get('isbn13', book.get('isbn')))
-        title = tostring(book.find('titlelong'))
-        if not title:
-            title = tostring(book.find('title'))
-        self.title = title
-        self.title = unicode(self.title).strip()
-        authors = []
-        au = tostring(book.find('authorstext'))
-        if au:
-            au = au.strip()
-            temp = au.split(',')
-            for au in temp:
-                if not au: continue
-                authors.extend([a.strip() for a in au.split('&amp;')])
-        if authors:
-            self.authors = authors
-        try:
-            self.author_sort = tostring(book.find('authors').find('person'))
-            if self.authors and self.author_sort == self.authors[0]:
-                self.author_sort = None
-        except:
-            pass
-        self.publisher = tostring(book.find('publishertext'))
-
-        summ = tostring(book.find('summary'))
-        if summ:
-            self.comments = 'SUMMARY:\n'+summ
-
-
-def build_isbn(base_url, opts):
-    return base_url + 'index1=isbn&value1='+opts.isbn
-
-def build_combined(base_url, opts):
-    query = ' '.join([e for e in (opts.title, opts.author, opts.publisher) \
-        if e is not None ])
-    query = query.strip()
-    if len(query) == 0:
-        raise ISBNDBError('You must specify at least one of --author, --title or --publisher')
-
-    query = re.sub(r'\s+', '+', query)
-    if isinstance(query, unicode):
-        query = query.encode('utf-8')
-    return base_url+'index1=combined&value1='+quote(query, '+')
-
-
-def option_parser():
-    parser = OptionParser(usage=\
-_('''
-%prog [options] key
-
-Fetch metadata for books from isndb.com. You can specify either the
-books ISBN ID or its title and author. If you specify the title and author,
-then more than one book may be returned.
-
-key is the account key you generate after signing up for a free account from isbndb.com.
-
-'''))
-    parser.add_option('-i', '--isbn', default=None, dest='isbn',
-                      help=_('The ISBN ID of the book you want metadata for.'))
-    parser.add_option('-a', '--author', dest='author',
-                      default=None, help=_('The author whose book to search for.'))
-    parser.add_option('-t', '--title', dest='title',
-                      default=None, help=_('The title of the book to search for.'))
-    parser.add_option('-p', '--publisher', default=None, dest='publisher',
-                      help=_('The publisher of the book to search for.'))
-    parser.add_option('-v', '--verbose', default=False,
-                      action='store_true', help=_('Verbose processing'))
-
-    return parser
-
-
-def create_books(opts, args, timeout=5.):
-    base_url = BASE_URL%dict(key=args[1])
-    if opts.isbn is not None:
-        url = build_isbn(base_url, opts)
-    else:
-        url = build_combined(base_url, opts)
-
-    if opts.verbose:
-        print ('ISBNDB query: '+url)
-
-    tans = [ISBNDBMetadata(book) for book in fetch_metadata(url, timeout=timeout)]
-    #remove duplicates ISBN
-    return list(dict((book.isbn, book) for book in tans).values())
-
-def main(args=sys.argv):
-    parser = option_parser()
-    opts, args = parser.parse_args(args)
-    if len(args) != 2:
-        parser.print_help()
-        print ('You must supply the isbndb.com key')
-        return 1
-
-    for book in create_books(opts, args):
-        print unicode(book).encode('utf-8')
-
-    return 0
-
-if __name__ == '__main__':
-    sys.exit(main())
--- a/src/calibre/ebooks/metadata/mobi.py
+++ b/src/calibre/ebooks/metadata/mobi.py
@ -400,7 +400,8 @@ class MetadataUpdater(object):
        if getattr(self, 'exth', None) is None:
            raise MobiError('No existing EXTH record. Cannot update metadata.')

-        self.record0[92:96] = iana2mobi(mi.language)
+        if not mi.is_null('language'):
+            self.record0[92:96] = iana2mobi(mi.language)
        self.create_exth(exth=exth, new_title=mi.title)

        # Fetch updated timestamp, cover_record, thumbnail_record
--- a/src/calibre/ebooks/metadata/nicebooks.py
+++ b/src/calibre/ebooks/metadata/nicebooks.py
@ -1,411 +0,0 @@
-from __future__ import with_statement
-__license__ = 'GPL 3'
-__copyright__ = '2010, sengian <sengian1@gmail.com>'
-__docformat__ = 'restructuredtext en'
-
-import sys, textwrap, re, traceback, socket
-from urllib import urlencode
-from math import ceil
-from copy import deepcopy
-
-from lxml.html import soupparser
-
-from calibre.utils.date import parse_date, utcnow, replace_months
-from calibre.utils.cleantext import clean_ascii_chars
-from calibre import browser, preferred_encoding
-from calibre.ebooks.chardet import xml_to_unicode
-from calibre.ebooks.metadata import MetaInformation, check_isbn, \
-    authors_to_sort_string
-from calibre.ebooks.metadata.fetch import MetadataSource
-from calibre.ebooks.metadata.covers import CoverDownload
-from calibre.utils.config import OptionParser
-
-class NiceBooks(MetadataSource):
-
-    name = 'Nicebooks'
-    description = _('Downloads metadata from french Nicebooks')
-    supported_platforms = ['windows', 'osx', 'linux']
-    author = 'Sengian'
-    version             = (1, 0, 0)
-
-    def fetch(self):
-        try:
-            self.results = search(self.title, self.book_author, self.publisher,
-                                  self.isbn, max_results=10, verbose=self.verbose)
-        except Exception as e:
-            self.exception = e
-            self.tb = traceback.format_exc()
-
-class NiceBooksCovers(CoverDownload):
-
-    name = 'Nicebooks covers'
-    description = _('Downloads covers from french Nicebooks')
-    supported_platforms = ['windows', 'osx', 'linux']
-    author = 'Sengian'
-    type = _('Cover download')
-    version             = (1, 0, 0)
-
-    def has_cover(self, mi, ans, timeout=5.):
-        if not mi.isbn:
-            return False
-        br = browser()
-        try:
-            entry = Query(isbn=mi.isbn, max_results=1)(br, False, timeout)[0]
-            if Covers(mi.isbn)(entry).check_cover():
-                self.debug('cover for', mi.isbn, 'found')
-                ans.set()
-        except Exception as e:
-            self.debug(e)
-
-    def get_covers(self, mi, result_queue, abort, timeout=5.):
-        if not mi.isbn:
-            return
-        br = browser()
-        try:
-            entry = Query(isbn=mi.isbn, max_results=1)(br, False, timeout)[0]
-            cover_data, ext = Covers(mi.isbn)(entry).get_cover(br, timeout)
-            if not ext:
-                ext = 'jpg'
-            result_queue.put((True, cover_data, ext, self.name))
-        except Exception as e:
-            result_queue.put((False, self.exception_to_string(e),
-                traceback.format_exc(), self.name))
-
-
-class NiceBooksError(Exception):
-    pass
-
-class ISBNNotFound(NiceBooksError):
-    pass
-
-def report(verbose):
-    if verbose:
-        traceback.print_exc()
-
-class Query(object):
-
-    BASE_URL = 'http://fr.nicebooks.com/'
-
-    def __init__(self, title=None, author=None, publisher=None, isbn=None, keywords=None, max_results=20):
-        assert not(title is None and author is None and publisher is None \
-            and isbn is None and keywords is None)
-        assert (max_results < 21)
-
-        self.max_results = int(max_results)
-
-        if isbn is not None:
-            q = isbn
-        else:
-            q = ' '.join([i for i in (title, author, publisher, keywords) \
-                if i is not None])
-
-        if isinstance(q, unicode):
-            q = q.encode('utf-8')
-        self.urldata = 'search?' + urlencode({'q':q,'s':'Rechercher'})
-
-    def __call__(self, browser, verbose, timeout = 5.):
-        if verbose:
-            print _('Query: %s') % self.BASE_URL+self.urldata
-
-        try:
-            raw = browser.open_novisit(self.BASE_URL+self.urldata, timeout=timeout).read()
-        except Exception as e:
-            report(verbose)
-            if callable(getattr(e, 'getcode', None)) and \
-                    e.getcode() == 404:
-                return
-            if isinstance(getattr(e, 'args', [None])[0], socket.timeout):
-                raise NiceBooksError(_('Nicebooks timed out. Try again later.'))
-            raise NiceBooksError(_('Nicebooks encountered an error.'))
-        if '<title>404 - ' in raw:
-            return
-        raw = xml_to_unicode(raw, strip_encoding_pats=True,
-                resolve_entities=True)[0]
-        try:
-            feed = soupparser.fromstring(raw)
-        except:
-            try:
-                #remove ASCII invalid chars
-                feed = soupparser.fromstring(clean_ascii_chars(raw))
-            except:
-                return None
-
-        #nb of page to call
-        try:
-            nbresults = int(feed.xpath("//div[@id='topbar']/b")[0].text)
-        except:
-            #direct hit
-            return [feed]
-
-        nbpagetoquery = int(ceil(float(min(nbresults, self.max_results))/10))
-        pages =[feed]
-        if nbpagetoquery > 1:
-            for i in xrange(2, nbpagetoquery + 1):
-                try:
-                    urldata = self.urldata + '&p=' + str(i)
-                    raw = browser.open_novisit(self.BASE_URL+urldata, timeout=timeout).read()
-                except Exception as e:
-                    continue
-                if '<title>404 - ' in raw:
-                    continue
-                raw = xml_to_unicode(raw, strip_encoding_pats=True,
-                        resolve_entities=True)[0]
-                try:
-                    feed = soupparser.fromstring(raw)
-                except:
-                    try:
-                        #remove ASCII invalid chars
-                        feed = soupparser.fromstring(clean_ascii_chars(raw))
-                    except:
-                        continue
-                pages.append(feed)
-
-        results = []
-        for x in pages:
-            results.extend([i.find_class('title')[0].get('href') \
-                for i in x.xpath("//ul[@id='results']/li")])
-        return results[:self.max_results]
-
-class ResultList(list):
-
-    BASE_URL = 'http://fr.nicebooks.com'
-
-    def __init__(self):
-        self.repub = re.compile(u'\s*.diteur\s*', re.I)
-        self.reauteur = re.compile(u'\s*auteur.*', re.I)
-        self.reautclean = re.compile(u'\s*\(.*\)\s*')
-
-    def get_title(self, entry):
-        title = deepcopy(entry)
-        title.remove(title.find("dl[@title='Informations sur le livre']"))
-        title = ' '.join([i.text_content() for i in title.iterchildren()])
-        return unicode(title.replace('\n', ''))
-
-    def get_authors(self, entry):
-        author = entry.find("dl[@title='Informations sur le livre']")
-        authortext = []
-        for x in author.getiterator('dt'):
-            if self.reauteur.match(x.text):
-                elt = x.getnext()
-                while elt.tag == 'dd':
-                    authortext.append(unicode(elt.text_content()))
-                    elt = elt.getnext()
-                break
-        if len(authortext) == 1:
-            authortext = [self.reautclean.sub('', authortext[0])]
-        return authortext
-
-    def get_description(self, entry, verbose):
-        try:
-            return u'RESUME:\n' + unicode(entry.getparent().xpath("//p[@id='book-description']")[0].text)
-        except:
-            report(verbose)
-            return None
-
-    def get_book_info(self, entry, mi, verbose):
-        entry = entry.find("dl[@title='Informations sur le livre']")
-        for x in entry.getiterator('dt'):
-            if x.text == 'ISBN':
-                isbntext = x.getnext().text_content().replace('-', '')
-                if check_isbn(isbntext):
-                    mi.isbn = unicode(isbntext)
-            elif self.repub.match(x.text):
-                mi.publisher = unicode(x.getnext().text_content())
-            elif x.text == 'Langue':
-                mi.language = unicode(x.getnext().text_content())
-            elif x.text == 'Date de parution':
-                d = x.getnext().text_content()
-                try:
-                    default = utcnow().replace(day=15)
-                    d = replace_months(d, 'fr')
-                    d = parse_date(d, assume_utc=True, default=default)
-                    mi.pubdate = d
-                except:
-                    report(verbose)
-        return mi
-
-    def fill_MI(self, entry, title, authors, verbose):
-        mi = MetaInformation(title, authors)
-        mi.author_sort = authors_to_sort_string(authors)
-        mi.comments = self.get_description(entry, verbose)
-        return self.get_book_info(entry, mi, verbose)
-
-    def get_individual_metadata(self, browser, linkdata, verbose):
-        try:
-            raw = browser.open_novisit(self.BASE_URL + linkdata).read()
-        except Exception as e:
-            report(verbose)
-            if callable(getattr(e, 'getcode', None)) and \
-                    e.getcode() == 404:
-                return
-            if isinstance(getattr(e, 'args', [None])[0], socket.timeout):
-                raise NiceBooksError(_('Nicebooks timed out. Try again later.'))
-            raise NiceBooksError(_('Nicebooks encountered an error.'))
-        if '<title>404 - ' in raw:
-            report(verbose)
-            return
-        raw = xml_to_unicode(raw, strip_encoding_pats=True,
-                resolve_entities=True)[0]
-        try:
-            feed = soupparser.fromstring(raw)
-        except:
-            try:
-                #remove ASCII invalid chars
-                feed = soupparser.fromstring(clean_ascii_chars(raw))
-            except:
-                return None
-
-        # get results
-        return feed.xpath("//div[@id='container']")[0]
-
-    def populate(self, entries, browser, verbose=False):
-        #single entry
-        if len(entries) == 1 and not isinstance(entries[0], str):
-            try:
-                entry = entries[0].xpath("//div[@id='container']")[0]
-                entry = entry.find("div[@id='book-info']")
-                title = self.get_title(entry)
-                authors = self.get_authors(entry)
-            except Exception as e:
-                if verbose:
-                    print 'Failed to get all details for an entry'
-                    print e
-                return
-            self.append(self.fill_MI(entry, title, authors, verbose))
-        else:
-        #multiple entries
-            for x in entries:
-                try:
-                    entry = self.get_individual_metadata(browser, x, verbose)
-                    entry = entry.find("div[@id='book-info']")
-                    title = self.get_title(entry)
-                    authors = self.get_authors(entry)
-                except Exception as e:
-                    if verbose:
-                        print 'Failed to get all details for an entry'
-                        print e
-                    continue
-                self.append(self.fill_MI(entry, title, authors, verbose))
-
-class Covers(object):
-
-    def __init__(self, isbn = None):
-        assert isbn is not None
-        self.urlimg = ''
-        self.isbn = isbn
-        self.isbnf = False
-
-    def __call__(self, entry = None):
-        try:
-            self.urlimg = entry.xpath("//div[@id='book-picture']/a")[0].get('href')
-        except:
-            return self
-        isbno = entry.get_element_by_id('book-info').find("dl[@title='Informations sur le livre']")
-        for x in isbno.getiterator('dt'):
-            if x.text == 'ISBN' and check_isbn(x.getnext().text_content()):
-                self.isbnf = True
-                break
-        return self
-
-    def check_cover(self):
-        return True if self.urlimg else False
-
-    def get_cover(self, browser, timeout = 5.):
-        try:
-            cover, ext = browser.open_novisit(self.urlimg, timeout=timeout).read(), \
-                self.urlimg.rpartition('.')[-1]
-            return cover, ext if ext else 'jpg'
-        except Exception as err:
-            if isinstance(getattr(err, 'args', [None])[0], socket.timeout):
-                raise NiceBooksError(_('Nicebooks timed out. Try again later.'))
-            if not len(self.urlimg):
-                if not self.isbnf:
-                    raise ISBNNotFound(_('ISBN: %s not found.') % self.isbn)
-                raise NiceBooksError(_('An errror occured with Nicebooks cover fetcher'))
-
-
-def search(title=None, author=None, publisher=None, isbn=None,
-           max_results=5, verbose=False, keywords=None):
-    br = browser()
-    entries = Query(title=title, author=author, isbn=isbn, publisher=publisher,
-        keywords=keywords, max_results=max_results)(br, verbose,timeout = 10.)
-
-    if entries is None or len(entries) == 0:
-        return None
-
-    #List of entry
-    ans = ResultList()
-    ans.populate(entries, br, verbose)
-    return ans
-
-def check_for_cover(isbn):
-    br = browser()
-    entry = Query(isbn=isbn, max_results=1)(br, False)[0]
-    return Covers(isbn)(entry).check_cover()
-
-def cover_from_isbn(isbn, timeout = 5.):
-    br = browser()
-    entry = Query(isbn=isbn, max_results=1)(br, False, timeout)[0]
-    return Covers(isbn)(entry).get_cover(br, timeout)
-
-
-def option_parser():
-    parser = OptionParser(textwrap.dedent(\
-    _('''\
-        %prog [options]
-
-        Fetch book metadata from Nicebooks. You must specify one of title, author,
-        ISBN, publisher or keywords. Will fetch a maximum of 20 matches,
-        so you should make your query as specific as possible.
-        It can also get covers if the option is activated.
-    ''')
-    ))
-    parser.add_option('-t', '--title', help=_('Book title'))
-    parser.add_option('-a', '--author', help=_('Book author(s)'))
-    parser.add_option('-p', '--publisher', help=_('Book publisher'))
-    parser.add_option('-i', '--isbn', help=_('Book ISBN'))
-    parser.add_option('-k', '--keywords', help=_('Keywords'))
-    parser.add_option('-c', '--covers', default=0,
-                      help=_('Covers: 1-Check/ 2-Download'))
-    parser.add_option('-p', '--coverspath', default='',
-                      help=_('Covers files path'))
-    parser.add_option('-m', '--max-results', default=20,
-                      help=_('Maximum number of results to fetch'))
-    parser.add_option('-v', '--verbose', default=0, action='count',
-                      help=_('Be more verbose about errors'))
-    return parser
-
-def main(args=sys.argv):
-    import os
-    parser = option_parser()
-    opts, args = parser.parse_args(args)
-    try:
-        results = search(opts.title, opts.author, isbn=opts.isbn, publisher=opts.publisher,
-            keywords=opts.keywords, verbose=opts.verbose, max_results=opts.max_results)
-    except AssertionError:
-        report(True)
-        parser.print_help()
-        return 1
-    if results is None or len(results) == 0:
-        print _('No result found for this search!')
-        return 0
-    for result in results:
-        print unicode(result).encode(preferred_encoding, 'replace')
-        covact = int(opts.covers)
-        if  covact == 1:
-            textcover = _('No cover found!')
-            if check_for_cover(result.isbn):
-                textcover = _('A cover was found for this book')
-            print textcover
-        elif covact == 2:
-            cover_data, ext = cover_from_isbn(result.isbn)
-            cpath = result.isbn
-            if len(opts.coverspath):
-                cpath = os.path.normpath(opts.coverspath + '/' + result.isbn)
-            oname = os.path.abspath(cpath+'.'+ext)
-            open(oname, 'wb').write(cover_data)
-            print _('Cover saved to file '), oname
-        print
-
-if __name__ == '__main__':
-    sys.exit(main())
--- a/src/calibre/ebooks/metadata/opf2.py
+++ b/src/calibre/ebooks/metadata/opf2.py
@ -966,7 +966,9 @@ class OPF(object): # {{{
            cover_id = covers[0].get('content')
            for item in self.itermanifest():
                if item.get('id', None) == cover_id:
-                    return item.get('href', None)
+                    mt = item.get('media-type', '')
+                    if 'xml' not in mt:
+                        return item.get('href', None)

    @dynamic_property
    def cover(self):
--- a/src/calibre/ebooks/metadata/sources/amazon.py
+++ b/src/calibre/ebooks/metadata/sources/amazon.py
@ -301,7 +301,7 @@ class Amazon(Source):
        if asin is None:
            asin = identifiers.get('asin', None)
        if asin:
-            return 'http://amzn.com/%s'%asin
+            return ('amazon', asin, 'http://amzn.com/%s'%asin)
    # }}}

    def create_query(self, log, title=None, authors=None, identifiers={}): # {{{
--- a/src/calibre/ebooks/metadata/sources/base.py
+++ b/src/calibre/ebooks/metadata/sources/base.py
@ -56,7 +56,8 @@ class InternalMetadataCompareKeyGen(object):

    '''
    Generate a sort key for comparison of the relevance of Metadata objects,
-    given a search query.
+    given a search query. This is used only to compare results from the same
+    metadata source, not across different sources.

    The sort key ensures that an ascending order sort is a sort by order of
    decreasing relevance.
@ -306,7 +307,7 @@ class Source(Plugin):
            title_patterns = [(re.compile(pat, re.IGNORECASE), repl) for pat, repl in
            [
                # Remove things like: (2010) (Omnibus) etc.
-                (r'(?i)[({\[](\d{4}|omnibus|anthology|hardcover|paperback|mass\s*market|edition|ed\.)[\])}]', ''),
+                (r'(?i)[({\[](\d{4}|omnibus|anthology|hardcover|paperback|turtleback|mass\s*market|edition|ed\.)[\])}]', ''),
                # Remove any strings that contain the substring edition inside
                # parentheses
                (r'(?i)[({\[].*?(edition|ed.).*?[\]})]', ''),
@ -374,7 +375,11 @@ class Source(Plugin):

    def get_book_url(self, identifiers):
        '''
-        Return the URL for the book identified by identifiers at this source.
+        Return a 3-tuple or None. The 3-tuple is of the form:
+        (identifier_type, identifier_value, URL).
+        The URL is the URL for the book identified by identifiers at this
+        source. identifier_type, identifier_value specify the identifier
+        corresponding to the URL.
        This URL must be browseable to by a human using a browser. It is meant
        to provide a clickable link for the user to easily visit the books page
        at this source.
--- a/src/calibre/ebooks/metadata/sources/cli.py
+++ b/src/calibre/ebooks/metadata/sources/cli.py
@ -19,13 +19,8 @@ from calibre.ebooks.metadata.opf2 import metadata_to_opf
 from calibre.ebooks.metadata.sources.base import create_log
 from calibre.ebooks.metadata.sources.identify import identify
 from calibre.ebooks.metadata.sources.covers import download_cover
-from calibre.utils.config import test_eight_code

 def option_parser():
-    if not test_eight_code:
-        from calibre.ebooks.metadata.fetch import option_parser
-        return option_parser()
-
    parser = OptionParser(textwrap.dedent(
        '''\
        %prog [options]
@ -48,9 +43,6 @@ def option_parser():
    return parser

 def main(args=sys.argv):
-    if not test_eight_code:
-        from calibre.ebooks.metadata.fetch import main
-        return main(args)
    parser = option_parser()
    opts, args = parser.parse_args(args)

--- a/src/calibre/ebooks/metadata/sources/google.py
+++ b/src/calibre/ebooks/metadata/sources/google.py
@ -173,7 +173,7 @@ class GoogleBooks(Source):
    def get_book_url(self, identifiers): # {{{
        goog = identifiers.get('google', None)
        if goog is not None:
-            return 'http://books.google.com/books?id=%s'%goog
+            return ('google', goog, 'http://books.google.com/books?id=%s'%goog)
    # }}}

    def create_query(self, log, title=None, authors=None, identifiers={}): # {{{
--- a/src/calibre/ebooks/metadata/sources/identify.py
+++ b/src/calibre/ebooks/metadata/sources/identify.py
@ -13,6 +13,7 @@ from Queue import Queue, Empty
 from threading import Thread
 from io import BytesIO
 from operator import attrgetter
+from urlparse import urlparse

 from calibre.customize.ui import metadata_plugins, all_metadata_plugins
 from calibre.ebooks.metadata.sources.base import create_log, msprefs
@ -400,6 +401,9 @@ def identify(log, abort, # {{{
                    and plugin.get_cached_cover_url(result.identifiers) is not
                    None)
            result.identify_plugin = plugin
+            if msprefs['txt_comments']:
+                if plugin.has_html_comments and result.comments:
+                    result.comments = html2text(result.comments)

    log('The identify phase took %.2f seconds'%(time.time() - start_time))
    log('The longest time (%f) was taken by:'%longest, lp)
@ -410,10 +414,6 @@ def identify(log, abort, # {{{
    log('We have %d merged results, merging took: %.2f seconds' %
            (len(results), time.time() - start_time))

-    if msprefs['txt_comments']:
-        for r in results:
-            if r.identify_plugin.has_html_comments and r.comments:
-                r.comments = html2text(r.comments)

    max_tags = msprefs['max_tags']
    for r in results:
@ -435,18 +435,38 @@ def identify(log, abort, # {{{
 # }}}

 def urls_from_identifiers(identifiers): # {{{
+    identifiers = dict([(k.lower(), v) for k, v in identifiers.iteritems()])
    ans = []
    for plugin in all_metadata_plugins():
        try:
-            url = plugin.get_book_url(identifiers)
-            if url is not None:
-                ans.append((plugin.name, url))
+            id_type, id_val, url = plugin.get_book_url(identifiers)
+            ans.append((plugin.name, id_type, id_val, url))
        except:
            pass
    isbn = identifiers.get('isbn', None)
    if isbn:
-        ans.append((isbn,
-            'http://www.worldcat.org/search?q=bn%%3A%s&qt=advanced'%isbn))
+        ans.append((isbn, 'isbn', isbn,
+            'http://www.worldcat.org/isbn/'+isbn))
+    doi = identifiers.get('doi', None)
+    if doi:
+        ans.append(('DOI', 'doi', doi,
+            'http://dx.doi.org/'+doi))
+    arxiv = identifiers.get('arxiv', None)
+    if arxiv:
+        ans.append(('arXiv', 'arxiv', arxiv,
+            'http://arxiv.org/abs/'+arxiv))
+    oclc = identifiers.get('oclc', None)
+    if oclc:
+        ans.append(('OCLC', 'oclc', oclc,
+            'http://www.worldcat.org/oclc/'+oclc))
+    url = identifiers.get('uri', None)
+    if url is None:
+        url = identifiers.get('url', None)
+    if url and url.startswith('http'):
+        url = url[:8].replace('|', ':') + url[8:].replace('|', ',')
+        parts = urlparse(url)
+        name = parts.netloc
+        ans.append((name, 'url', url, url))
    return ans
 # }}}

--- a/src/calibre/ebooks/metadata/sources/isbndb.py
+++ b/src/calibre/ebooks/metadata/sources/isbndb.py
@ -81,7 +81,7 @@ class ISBNDB(Source):
            author_tokens = self.get_author_tokens(authors,
                    only_first_author=True)
            tokens += author_tokens
-            tokens = [quote(t) for t in tokens]
+            tokens = [quote(t.encode('utf-8') if isinstance(t, unicode) else t) for t in tokens]
            q = '+'.join(tokens)
            q = 'index1=combined&value1='+q

--- a/src/calibre/ebooks/metadata/sources/overdrive.py
+++ b/src/calibre/ebooks/metadata/sources/overdrive.py
@ -41,7 +41,7 @@ class OverDrive(Source):
    cached_cover_url_is_reliable = True

    options = (
-            Option('get_full_metadata', 'bool', False,
+            Option('get_full_metadata', 'bool', True,
                _('Download all metadata (slow)'),
                _('Enable this option to gather all metadata available from Overdrive.')),
            )
@ -265,7 +265,7 @@ class OverDrive(Source):
                    if creators:
                        creators = creators.split(', ')
                    # if an exact match in a preferred format occurs
-                    if ((author and creators[0] == author[0]) or (not author and not creators)) and od_title.lower() == title.lower() and int(formatid) in [1, 50, 410, 900] and thumbimage:
+                    if ((author and creators and creators[0] == author[0]) or (not author and not creators)) and od_title.lower() == title.lower() and int(formatid) in [1, 50, 410, 900] and thumbimage:
                        return self.format_results(reserveid, od_title, subtitle, series, publisher,
                                creators, thumbimage, worldcatlink, formatid)
                    else:
@ -291,7 +291,7 @@ class OverDrive(Source):
                                close_matches.insert(0, self.format_results(reserveid, od_title, subtitle, series, publisher, creators, thumbimage, worldcatlink, formatid))
                            else:
                                close_matches.append(self.format_results(reserveid, od_title, subtitle, series, publisher, creators, thumbimage, worldcatlink, formatid))
-                                
+
                        elif close_title_match and close_author_match and int(formatid) in [1, 50, 410, 900]:
                            close_matches.append(self.format_results(reserveid, od_title, subtitle, series, publisher, creators, thumbimage, worldcatlink, formatid))

--- a/src/calibre/ebooks/metadata/worker.py
+++ b/src/calibre/ebooks/metadata/worker.py
@ -222,7 +222,7 @@ class SaveWorker(Thread):
                        if isbytestring(fpath):
                            fpath = fpath.decode(filesystem_encoding)
                        formats[fmt.lower()] = fpath
-            data[i] = [opf, cpath, formats]
+            data[i] = [opf, cpath, formats, mi.last_modified.isoformat()]
        return data

    def run(self):
--- a/src/calibre/ebooks/mobi/reader.py
+++ b/src/calibre/ebooks/mobi/reader.py
@ -253,6 +253,8 @@ class MobiReader(object):

                .italic { font-style: italic }

+                .underline { text-decoration: underline }
+
                .mbp_pagebreak {
                    page-break-after: always; margin: 0; display: block
                }
@ -601,6 +603,9 @@ class MobiReader(object):
            elif tag.tag == 'i':
                tag.tag = 'span'
                tag.attrib['class'] = 'italic'
+            elif tag.tag == 'u':
+                tag.tag = 'span'
+                tag.attrib['class'] = 'underline'
            elif tag.tag == 'b':
                tag.tag = 'span'
                tag.attrib['class'] = 'bold'
--- a/src/calibre/ebooks/odt/input.py
+++ b/src/calibre/ebooks/odt/input.py
@ -7,6 +7,8 @@ __docformat__ = 'restructuredtext en'
 Convert an ODT file into a Open Ebook
 '''
 import os
+
+from lxml import etree
 from odf.odf2xhtml import ODF2XHTML

 from calibre import CurrentDir, walk
@ -23,7 +25,51 @@ class Extract(ODF2XHTML):
                with open(name, 'wb') as f:
                    f.write(data)

-    def __call__(self, stream, odir):
+    def filter_css(self, html, log):
+        root = etree.fromstring(html)
+        style = root.xpath('//*[local-name() = "style" and @type="text/css"]')
+        if style:
+            style = style[0]
+            css = style.text
+            if css:
+                style.text, sel_map = self.do_filter_css(css)
+                for x in root.xpath('//*[@class]'):
+                    extra = []
+                    orig = x.get('class')
+                    for cls in orig.split():
+                        extra.extend(sel_map.get(cls, []))
+                    if extra:
+                        x.set('class', orig + ' ' + ' '.join(extra))
+                html = etree.tostring(root, encoding='utf-8',
+                        xml_declaration=True)
+        return html
+
+    def do_filter_css(self, css):
+        from cssutils import parseString
+        from cssutils.css import CSSRule
+        sheet = parseString(css)
+        rules = list(sheet.cssRules.rulesOfType(CSSRule.STYLE_RULE))
+        sel_map = {}
+        count = 0
+        for r in rules:
+            # Check if we have only class selectors for this rule
+            nc = [x for x in r.selectorList if not
+                    x.selectorText.startswith('.')]
+            if len(r.selectorList) > 1 and not nc:
+                # Replace all the class selectors with a single class selector
+                # This will be added to the class attribute of all elements
+                # that have one of these selectors.
+                replace_name = 'c_odt%d'%count
+                count += 1
+                for sel in r.selectorList:
+                    s = sel.selectorText[1:]
+                    if s not in sel_map:
+                        sel_map[s] = []
+                    sel_map[s].append(replace_name)
+                r.selectorText = '.'+replace_name
+        return sheet.cssText, sel_map
+
+    def __call__(self, stream, odir, log):
        from calibre.utils.zipfile import ZipFile
        from calibre.ebooks.metadata.meta import get_metadata
        from calibre.ebooks.metadata.opf2 import OPFCreator
@ -32,13 +78,17 @@ class Extract(ODF2XHTML):
        if not os.path.exists(odir):
            os.makedirs(odir)
        with CurrentDir(odir):
-            print 'Extracting ODT file...'
+            log('Extracting ODT file...')
            html = self.odf2xhtml(stream)
            # A blanket img specification like this causes problems
-            # with EPUB output as the contaiing element often has
+            # with EPUB output as the containing element often has
            # an absolute height and width set that is larger than
            # the available screen real estate
            html = html.replace('img { width: 100%; height: 100%; }', '')
+            try:
+                html = self.filter_css(html, log)
+            except:
+                log.exception('Failed to filter CSS, conversion may be slow')
            with open('index.xhtml', 'wb') as f:
                f.write(html.encode('utf-8'))
            zf = ZipFile(stream, 'r')
@ -67,7 +117,7 @@ class ODTInput(InputFormatPlugin):

    def convert(self, stream, options, file_ext, log,
                accelerators):
-        return Extract()(stream, '.')
+        return Extract()(stream, '.', log)

    def postprocess_book(self, oeb, opts, log):
        # Fix <p><div> constructs as the asinine epubchecker complains
--- a/src/calibre/ebooks/oeb/base.py
+++ b/src/calibre/ebooks/oeb/base.py
@ -16,7 +16,7 @@ from urllib import unquote as urlunquote
 from lxml import etree, html
 from calibre.constants import filesystem_encoding, __version__
 from calibre.translations.dynamic import translate
-from calibre.ebooks.chardet import xml_to_unicode
+from calibre.ebooks.chardet import xml_to_unicode, strip_encoding_declarations
 from calibre.ebooks.oeb.entitydefs import ENTITYDEFS
 from calibre.ebooks.conversion.preprocess import CSSPreProcessor
 from calibre import isbytestring, as_unicode, get_types_map
@ -446,22 +446,23 @@ class NullContainer(object):
 class DirContainer(object):
    """Filesystem directory container."""

-    def __init__(self, path, log):
+    def __init__(self, path, log, ignore_opf=False):
        self.log = log
        if isbytestring(path):
            path = path.decode(filesystem_encoding)
+        self.opfname = None
        ext = os.path.splitext(path)[1].lower()
        if ext == '.opf':
            self.opfname = os.path.basename(path)
            self.rootdir = os.path.dirname(path)
            return
        self.rootdir = path
-        for path in self.namelist():
-            ext = os.path.splitext(path)[1].lower()
-            if ext == '.opf':
-                self.opfname = path
-                return
-        self.opfname = None
+        if not ignore_opf:
+            for path in self.namelist():
+                ext = os.path.splitext(path)[1].lower()
+                if ext == '.opf':
+                    self.opfname = path
+                    return

    def read(self, path):
        if path is None:
@ -852,6 +853,7 @@ class Manifest(object):
            self.oeb.log.debug('Parsing', self.href, '...')
            # Convert to Unicode and normalize line endings
            data = self.oeb.decode(data)
+            data = strip_encoding_declarations(data)
            data = self.oeb.html_preprocessor(data)
            # There could be null bytes in data if it had &#0; entities in it
            data = data.replace('\0', '')
@ -1047,8 +1049,8 @@ class Manifest(object):

            # Remove hyperlinks with no content as they cause rendering
            # artifacts in browser based renderers
-            # Also remove empty <b> and <i> tags
-            for a in xpath(data, '//h:a[@href]|//h:i|//h:b'):
+            # Also remove empty <b>, <u> and <i> tags
+            for a in xpath(data, '//h:a[@href]|//h:i|//h:b|//h:u'):
                if a.get('id', None) is None and a.get('name', None) is None \
                        and len(a) == 0 and not a.text:
                    remove_elem(a)
--- a/src/calibre/ebooks/oeb/stylizer.py
+++ b/src/calibre/ebooks/oeb/stylizer.py
@ -125,7 +125,19 @@ class Stylizer(object):
    def __init__(self, tree, path, oeb, opts, profile=None,
            extra_css='', user_css=''):
        self.oeb, self.opts = oeb, opts
-        self.profile = opts.input_profile
+        self.profile = profile
+        if self.profile is None:
+            # Use the default profile. This should really be using
+            # opts.output_profile, but I don't want to risk changing it, as
+            # doing so might well have hard to debug font size effects.
+            from calibre.customize.ui import output_profiles
+            for x in output_profiles():
+                if x.short_name == 'default':
+                    self.profile = x
+                    break
+        if self.profile is None:
+            # Just in case the default profile is removed in the future :)
+            self.profile = opts.output_profile
        self.logger = oeb.logger
        item = oeb.manifest.hrefs[path]
        basename = os.path.basename(path)
--- a/src/calibre/ebooks/oeb/transforms/metadata.py
+++ b/src/calibre/ebooks/oeb/transforms/metadata.py
@ -36,7 +36,7 @@ def meta_info_to_oeb_metadata(mi, m, log, override_input_metadata=False):
        m.clear('description')
        m.add('description', mi.comments)
    elif override_input_metadata:
-         m.clear('description')
+        m.clear('description')
    if not mi.is_null('publisher'):
        m.clear('publisher')
        m.add('publisher', mi.publisher)
--- a/src/calibre/ebooks/pdb/plucker/reader.py
+++ b/src/calibre/ebooks/pdb/plucker/reader.py
@ -16,6 +16,7 @@ from calibre import CurrentDir
 from calibre.ebooks.pdb.formatreader import FormatReader
 from calibre.ptempfile import TemporaryFile
 from calibre.utils.magick import Image, create_canvas
+from calibre.ebooks.compression.palmdoc import decompress_doc

 DATATYPE_PHTML = 0
 DATATYPE_PHTML_COMPRESSED = 1
@ -359,7 +360,7 @@ class Reader(FormatReader):
        # plugin assemble the order based on hyperlinks.
        with CurrentDir(output_dir):
            for uid, num in self.uid_text_secion_number.items():
-                self.log.debug(_('Writing record with uid: %s as %s.html' % (uid, uid)))
+                self.log.debug('Writing record with uid: %s as %s.html' % (uid, uid))
                with open('%s.html' % uid, 'wb') as htmlf:
                    html = u'<html><body>'
                    section_header, section_data = self.sections[num]
@ -465,7 +466,7 @@ class Reader(FormatReader):
            if not home_html:
                home_html = self.uid_text_secion_number.items()[0][0]
        except:
-            raise Exception(_('Could not determine home.html'))
+            raise Exception('Could not determine home.html')
        # Generate oeb from html conversion.
        oeb = html_input.convert(open('%s.html' % home_html, 'rb'), self.options, 'html', self.log, {})
        self.options.debug_pipeline = odi
--- a/src/calibre/ebooks/pdf/input.py
+++ b/src/calibre/ebooks/pdf/input.py
@ -32,10 +32,11 @@ class PDFInput(InputFormatPlugin):

    def convert_new(self, stream, accelerators):
        from calibre.ebooks.pdf.reflow import PDFDocument
+        from calibre.utils.cleantext import clean_ascii_chars
        if pdfreflow_err:
            raise RuntimeError('Failed to load pdfreflow: ' + pdfreflow_err)
        pdfreflow.reflow(stream.read(), 1, -1)
-        xml = open('index.xml', 'rb').read()
+        xml = clean_ascii_chars(open('index.xml', 'rb').read())
        PDFDocument(xml, self.opts, self.log)
        return os.path.join(os.getcwd(), 'metadata.opf')

--- a/src/calibre/ebooks/rtf/rtfml.py
+++ b/src/calibre/ebooks/rtf/rtfml.py
@ -15,7 +15,6 @@ import cStringIO
 from lxml import etree

 from calibre.ebooks.metadata import authors_to_string
-from calibre.utils.filenames import ascii_text
 from calibre.utils.magick.draw import save_cover_data_to, identify_data

 TAGS = {
@ -79,8 +78,7 @@ def txt2rtf(text):
        elif val <= 127:
            buf.write(x)
        else:
-            repl = ascii_text(x)
-            c = r'\uc{2}\u{0:d}{1}'.format(val, repl, len(repl))
+            c = r'\u{0:d}?'.format(val)
            buf.write(c)
    return buf.getvalue()

--- a/src/calibre/gui2/init.py
+++ b/src/calibre/gui2/init.py
@ -34,7 +34,7 @@ if isosx:
        )
    gprefs.defaults['action-layout-toolbar'] = (
        'Add Books', 'Edit Metadata', None, 'Convert Books', 'View', None,
-        'Choose Library', 'Donate', None, 'Fetch News', 'Save To Disk',
+        'Choose Library', 'Donate', None, 'Fetch News', 'Store', 'Save To Disk',
        'Connect Share', None, 'Remove Books',
        )
    gprefs.defaults['action-layout-toolbar-device'] = (
@ -48,7 +48,7 @@ else:
    gprefs.defaults['action-layout-menubar-device'] = ()
    gprefs.defaults['action-layout-toolbar'] = (
        'Add Books', 'Edit Metadata', None, 'Convert Books', 'View', None,
-        'Choose Library', 'Donate', None, 'Fetch News', 'Save To Disk',
+        'Choose Library', 'Donate', None, 'Fetch News', 'Store', 'Save To Disk',
        'Connect Share', None, 'Remove Books', None, 'Help', 'Preferences',
        )
    gprefs.defaults['action-layout-toolbar-device'] = (
@ -739,12 +739,6 @@ def build_forms(srcdir, info=None):
            dat = dat.replace('from QtWebKit.QWebView import QWebView',
                    'from PyQt4 import QtWebKit\nfrom PyQt4.QtWebKit import QWebView')

-            if form.endswith('viewer%smain.ui'%os.sep):
-                info('\t\tPromoting WebView')
-                dat = dat.replace('self.view = QtWebKit.QWebView(', 'self.view = DocumentView(')
-                dat = dat.replace('self.view = QWebView(', 'self.view = DocumentView(')
-                dat += '\n\nfrom calibre.gui2.viewer.documentview import DocumentView'
-
            open(compiled_form, 'wb').write(dat)

 _df = os.environ.get('CALIBRE_DEVELOP_FROM', None)
--- a/src/calibre/gui2/actions/add.py
+++ b/src/calibre/gui2/actions/add.py
@ -20,9 +20,8 @@ from calibre.ebooks import BOOK_EXTENSIONS
 from calibre.utils.filenames import ascii_filename
 from calibre.constants import preferred_encoding, filesystem_encoding
 from calibre.gui2.actions import InterfaceAction
-from calibre.gui2 import config, question_dialog
+from calibre.gui2 import question_dialog
 from calibre.ebooks.metadata import MetaInformation
-from calibre.utils.config import test_eight_code
 from calibre.ebooks.metadata.sources.base import msprefs

 def get_filters():
@ -180,26 +179,17 @@ class AddAction(InterfaceAction):
            except IndexError:
                self.gui.library_view.model().books_added(self.isbn_add_dialog.value)
                self.isbn_add_dialog.accept()
-                if test_eight_code:
-                    orig = msprefs['ignore_fields']
-                    new = list(orig)
-                    for x in ('title', 'authors'):
-                        if x in new:
-                            new.remove(x)
-                    msprefs['ignore_fields'] = new
-                    try:
-                        self.gui.iactions['Edit Metadata'].download_metadata(
-                            ids=self.add_by_isbn_ids)
-                    finally:
-                        msprefs['ignore_fields'] = orig
-                else:
-                    orig = config['overwrite_author_title_metadata']
-                    config['overwrite_author_title_metadata'] = True
-                    try:
-                        self.gui.iactions['Edit Metadata'].do_download_metadata(
-                                self.add_by_isbn_ids)
-                    finally:
-                        config['overwrite_author_title_metadata'] = orig
+                orig = msprefs['ignore_fields']
+                new = list(orig)
+                for x in ('title', 'authors'):
+                    if x in new:
+                        new.remove(x)
+                msprefs['ignore_fields'] = new
+                try:
+                    self.gui.iactions['Edit Metadata'].download_metadata(
+                        ids=self.add_by_isbn_ids)
+                finally:
+                    msprefs['ignore_fields'] = orig
                return


--- a/src/calibre/gui2/actions/choose_library.py
+++ b/src/calibre/gui2/actions/choose_library.py
@ -246,7 +246,8 @@ class ChooseLibraryAction(InterfaceAction):
    def delete_requested(self, name, location):
        loc = location.replace('/', os.sep)
        if not question_dialog(self.gui, _('Are you sure?'), '<p>'+
-                _('All files from %s will be '
+                _('<b style="color: red">All files</b> (not just ebooks) '
+                    'from <br><br><b>%s</b><br><br> will be '
                '<b>permanently deleted</b>. Are you sure?') % loc,
                show_copy_button=False):
            return
--- a/src/calibre/gui2/actions/edit_metadata.py
+++ b/src/calibre/gui2/actions/edit_metadata.py
@ -10,15 +10,13 @@ from functools import partial

 from PyQt4.Qt import Qt, QMenu, QModelIndex, QTimer

-from calibre.gui2 import error_dialog, config, Dispatcher, question_dialog
-from calibre.gui2.dialogs.metadata_single import MetadataSingleDialog
+from calibre.gui2 import error_dialog, Dispatcher, question_dialog
 from calibre.gui2.dialogs.metadata_bulk import MetadataBulkDialog
 from calibre.gui2.dialogs.confirm_delete import confirm
 from calibre.gui2.dialogs.tag_list_editor import TagListEditor
 from calibre.gui2.actions import InterfaceAction
 from calibre.ebooks.metadata import authors_to_string
 from calibre.utils.icu import sort_key
-from calibre.utils.config import test_eight_code

 class EditMetadataAction(InterfaceAction):

@ -36,22 +34,8 @@ class EditMetadataAction(InterfaceAction):
        md.addAction(_('Edit metadata in bulk'),
                partial(self.edit_metadata, False, bulk=True))
        md.addSeparator()
-        if test_eight_code:
-            dall = self.download_metadata
-        else:
-            dall = partial(self.download_metadata_old, False, covers=True)
-            dident = partial(self.download_metadata_old, False, covers=False)
-            dcovers = partial(self.download_metadata_old, False, covers=True,
-                    set_metadata=False, set_social_metadata=False)
-
-        md.addAction(_('Download metadata and covers'), dall,
+        md.addAction(_('Download metadata and covers'), self.download_metadata,
                Qt.ControlModifier+Qt.Key_D)
-        if not test_eight_code:
-            md.addAction(_('Download only metadata'), dident)
-            md.addAction(_('Download only covers'), dcovers)
-            md.addAction(_('Download only social metadata'),
-                partial(self.download_metadata_old, False, covers=False,
-                    set_metadata=False, set_social_metadata=True))
        self.metadata_menu = md

        mb = QMenu()
@ -88,7 +72,7 @@ class EditMetadataAction(InterfaceAction):
                            _('No books selected'), show=True)
            db = self.gui.library_view.model().db
            ids = [db.id(row.row()) for row in rows]
-        from calibre.gui2.metadata.bulk_download2 import start_download
+        from calibre.gui2.metadata.bulk_download import start_download
        start_download(self.gui, ids,
                Dispatcher(self.metadata_downloaded))

@ -96,7 +80,7 @@ class EditMetadataAction(InterfaceAction):
        if job.failed:
            self.gui.job_exception(job, dialog_title=_('Failed to download metadata'))
            return
-        from calibre.gui2.metadata.bulk_download2 import get_job_details
+        from calibre.gui2.metadata.bulk_download import get_job_details
        id_map, failed_ids, failed_covers, all_failed, det_msg = \
                                            get_job_details(job)
        if all_failed:
@ -112,8 +96,9 @@ class EditMetadataAction(InterfaceAction):
        show_copy_button = False
        if failed_ids or failed_covers:
            show_copy_button = True
+            num = len(failed_ids.union(failed_covers))
            msg += '<p>'+_('Could not download metadata and/or covers for %d of the books. Click'
-                    ' "Show details" to see which books.')%len(failed_ids)
+                    ' "Show details" to see which books.')%num

        payload = (id_map, failed_ids, failed_covers)
        from calibre.gui2.dialogs.message_box import ProceedNotification
@ -158,49 +143,6 @@ class EditMetadataAction(InterfaceAction):

        self.apply_metadata_changes(id_map)

-    def download_metadata_old(self, checked, covers=True, set_metadata=True,
-            set_social_metadata=None):
-        rows = self.gui.library_view.selectionModel().selectedRows()
-        if not rows or len(rows) == 0:
-            d = error_dialog(self.gui, _('Cannot download metadata'),
-                             _('No books selected'))
-            d.exec_()
-            return
-        db = self.gui.library_view.model().db
-        ids = [db.id(row.row()) for row in rows]
-        self.do_download_metadata(ids, covers=covers,
-                set_metadata=set_metadata,
-                set_social_metadata=set_social_metadata)
-
-    def do_download_metadata(self, ids, covers=True, set_metadata=True,
-            set_social_metadata=None):
-        m = self.gui.library_view.model()
-        db = m.db
-        if set_social_metadata is None:
-            get_social_metadata = config['get_social_metadata']
-        else:
-            get_social_metadata = set_social_metadata
-        from calibre.gui2.metadata.bulk_download import DoDownload
-        if set_social_metadata is not None and set_social_metadata:
-            x = _('social metadata')
-        else:
-            x = _('covers') if covers and not set_metadata else _('metadata')
-        title = _('Downloading {0} for {1} book(s)').format(x, len(ids))
-        self._download_book_metadata = DoDownload(self.gui, title, db, ids,
-                get_covers=covers, set_metadata=set_metadata,
-                get_social_metadata=get_social_metadata)
-        m.stop_metadata_backup()
-        try:
-            self._download_book_metadata.exec_()
-        finally:
-            m.start_metadata_backup()
-        cr = self.gui.library_view.currentIndex().row()
-        x = self._download_book_metadata
-        if x.updated:
-            self.gui.library_view.model().refresh_ids(
-                x.updated, cr)
-            if self.gui.cover_flow:
-                self.gui.cover_flow.dataChanged()
    # }}}

    def edit_metadata(self, checked, bulk=None):
@ -227,9 +169,7 @@ class EditMetadataAction(InterfaceAction):
                list(range(self.gui.library_view.model().rowCount(QModelIndex())))
            current_row = row_list.index(cr)

-        func = (self.do_edit_metadata if test_eight_code else
-                    self.do_edit_metadata_old)
-        changed, rows_to_refresh = func(row_list, current_row)
+        changed, rows_to_refresh = self.do_edit_metadata(row_list, current_row)

        m = self.gui.library_view.model()

@ -244,36 +184,6 @@ class EditMetadataAction(InterfaceAction):
            m.current_changed(current, previous)
            self.gui.tags_view.recount()

-    def do_edit_metadata_old(self, row_list, current_row):
-        changed = set([])
-        db = self.gui.library_view.model().db
-
-        while True:
-            prev = next_ = None
-            if current_row > 0:
-                prev = db.title(row_list[current_row-1])
-            if current_row < len(row_list) - 1:
-                next_ = db.title(row_list[current_row+1])
-
-            d = MetadataSingleDialog(self.gui, row_list[current_row], db,
-                    prev=prev, next_=next_)
-            d.view_format.connect(lambda
-                    fmt:self.gui.iactions['View'].view_format(row_list[current_row],
-                        fmt))
-            ret = d.exec_()
-            d.break_cycles()
-            if ret != d.Accepted:
-                break
-
-            changed.add(d.id)
-            self.gui.library_view.model().refresh_ids(list(d.books_to_refresh))
-            if d.row_delta == 0:
-                break
-            current_row += d.row_delta
-            self.gui.library_view.set_current_row(current_row)
-            self.gui.library_view.scroll_to_row(current_row)
-        return changed, set()
-
    def do_edit_metadata(self, row_list, current_row):
        from calibre.gui2.metadata.single import edit_metadata
        db = self.gui.library_view.model().db
@ -613,6 +523,7 @@ class EditMetadataAction(InterfaceAction):
                self.applied_ids, cr)
            if self.gui.cover_flow:
                self.gui.cover_flow.dataChanged()
+            self.gui.tags_view.recount()

        self.apply_id_map = []
        self.apply_pd = None
--- a/src/calibre/gui2/actions/preferences.py
+++ b/src/calibre/gui2/actions/preferences.py
@ -10,7 +10,7 @@ from PyQt4.Qt import QIcon, QMenu, Qt
 from calibre.gui2.actions import InterfaceAction
 from calibre.gui2.preferences.main import Preferences
 from calibre.gui2 import error_dialog
-from calibre.constants import DEBUG
+from calibre.constants import DEBUG, isosx

 class PreferencesAction(InterfaceAction):

@ -19,7 +19,8 @@ class PreferencesAction(InterfaceAction):

    def genesis(self):
        pm = QMenu()
-        pm.addAction(QIcon(I('config.png')), _('Preferences'), self.do_config)
+        acname = _('Change calibre behavior') if isosx else _('Preferences')
+        pm.addAction(QIcon(I('config.png')), acname, self.do_config)
        pm.addAction(QIcon(I('wizard.png')), _('Run welcome wizard'),
                self.gui.run_wizard)
        if not DEBUG:
--- a/src/calibre/gui2/actions/view.py
+++ b/src/calibre/gui2/actions/view.py
@ -60,7 +60,7 @@ class ViewAction(InterfaceAction):

    def build_menus(self, db):
        self.view_menu.clear()
-        self.view_menu.addAction(self.qaction)
+        self.view_menu.addAction(self.view_action)
        self.view_menu.addAction(self.view_specific_action)
        self.view_menu.addSeparator()
        self.view_menu.addAction(self.action_pick_random)
--- a/Show More
+++ b/Show More