Merge from trunk

2025-07-09 03:04:10 -04:00 · 2011-02-17 10:49:50 +00:00 · 2011-02-17 10:49:50 +00:00 · 8f83e34249
commit 8f83e34249
parent c8514f27ba eab629b1e2
32 changed files with 1389 additions and 638 deletions
--- a/resources/content_server/browse/browse.css
+++ b/resources/content_server/browse/browse.css
@ -464,5 +464,14 @@ h2.library_name {
    max-height: 50%;
 }

+.details a.details_category_link {
+    text-decoration: none;
+    color: blue
+}
+
+.details a.details_category_link:hover {
+    color: red
+}
+
 /* }}} */

--- a/resources/recipes/aprospect.recipe
+++ b/resources/recipes/aprospect.recipe
--- a/resources/recipes/credit_slips.recipe
+++ b/resources/recipes/credit_slips.recipe
@ -0,0 +1,35 @@
+#!/usr/bin/env  python
+__license__ = 'GPL 3'
+__copyright__ = 'zotzot'
+__docformat__ = 'restructuredtext en'
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+
+class CreditSlips(BasicNewsRecipe):
+    __license__ = 'GPL v3'
+    __author__ = 'zotzot'
+    language = 'en'
+    version = 1
+    title = u'Credit Slips.org'
+    publisher = u'Bankr-L'
+    category = u'Economic blog'
+    description = u'All things about credit.'
+    cover_url = 'http://bit.ly/hyZSTr'
+    oldest_article = 50
+    max_articles_per_feed = 100
+    use_embedded_content = True
+
+    feeds = [
+(u'Credit Slips', u'http://www.creditslips.org/creditslips/atom.xml')
+]
+    conversion_options = {
+'comments': description,
+'tags': category,
+'language': 'en',
+'publisher': publisher
+}
+    extra_css = '''
+                body{font-family:verdana,arial,helvetica,geneva,sans-serif;}
+                img {float: left; margin-right: 0.5em;}
+                '''
--- a/resources/recipes/detroit_news.recipe
+++ b/resources/recipes/detroit_news.recipe
@ -0,0 +1,64 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+
+import re
+class AdvancedUserRecipe1297291961(BasicNewsRecipe):
+    title          = u'Detroit News'
+    language = 'en'
+    __author__ = 'DTM'
+    oldest_article = 2
+    max_articles_per_feed = 20
+    no_stylesheets = True
+    conversion_options = {
+                                     'linearize_tables' : True,
+                                    }
+
+    feeds          = [
+                          (u'Headlines', u'http://www.detnews.com/apps/pbcs.dll/section?category=rss&mime=xml'),
+                          (u'Nation/World', u'http://www.detnews.com/apps/pbcs.dll/section?category=rss09&mime=xml'),
+                          (u'Metro/State', u'http://www.detnews.com/apps/pbcs.dll/section?category=rss36&mime=xml'),
+                          (u'Wayne County', u'http://www.detnews.com/apps/pbcs.dll/section?category=rss01&mime=xml'),
+                          (u'Oakland County', u'http://www.detnews.com/apps/pbcs.dll/section?category=rss02&mime=xml'),
+                          (u'Macomb County', u'http://www.detnews.com/apps/pbcs.dll/section?category=rss03&mime=xml'),
+                          (u'Livingston County', u'http://detnews.com/apps/pbcs.dll/section?category=rss04&mime=xml'),
+                          (u'Politics/Government', u'http://www.detnews.com/apps/pbcs.dll/section?category=rss10&mime=xml'),
+                          (u'Editorials', u'http://www.detnews.com/apps/pbcs.dll/section?category=rss07&mime=xml'),
+                          (u'Columnists', u'http://www.detnews.com/apps/pbcs.dll/section?category=rss43&mime=xml'),
+                          (u'Charlie LeDuff', u'http://detnews.com/apps/pbcs.dll/section?category=rss54&mime=xml'),
+                          (u'Religion', u'http://www.detnews.com/apps/pbcs.dll/section?category=rss11&mime=xml'),
+                          (u'Technology', u'http://www.detnews.com/apps/pbcs.dll/section?category=rss12&mime=xml'),
+                          (u'Commuting', u'http://detnews.com/apps/pbcs.dll/section?category=rss05&mime=xml'),
+                          (u'Schools', u'http://detnews.com/apps/pbcs.dll/section?category=rss06&mime=xml'),
+                          (u'Obituaries', u'http://www.detnews.com/apps/pbcs.dll/section?category=rss08&mime=xml'),
+                          (u'Autos Insider', u'http://detnews.com/apps/pbcs.dll/section?category=rss25&mime=xml'),
+                          (u'Drive', u'http://detnews.com/apps/pbcs.dll/section?category=rss26&mime=xml'),
+                          (u'Business', u'http://detnews.com/apps/pbcs.dll/section?category=rss21&mime=xml'),
+                          (u'Personal Finance', u'http://detnews.com/apps/pbcs.dll/section?category=rss23&mime=xml'),
+                          (u'Real Estate', u'http://detnews.com/apps/pbcs.dll/section?category=rss24&mime=xml'),
+                          (u'Movies', u'http://www.detnews.com/apps/pbcs.dll/section?category=rss28&mime=xml'),
+                          (u'TV', u'http://www.detnews.com/apps/pbcs.dll/section?category=rss40&mime=xml'),
+                          (u'Music/Nightlife', u'http://www.detnews.com/apps/pbcs.dll/section?category=rss30&mime=xml'),
+                          (u'Celebrities', u'http://www.detnews.com/apps/pbcs.dll/section?category=rss51&mime=xml'),
+                          (u'The Arts', u'http://www.detnews.com/apps/pbcs.dll/section?category=rss27&mime=xml'),
+                          (u'Food', u'http://www.detnews.com/apps/pbcs.dll/section?category=rss29&mime=xml'),
+                          (u'Homestyle', u'http://www.detnews.com/apps/pbcs.dll/section?category=rss31&mime=xml'),
+                          (u'The Green Life', u'http://www.detnews.com/apps/pbcs.dll/section?category=rss53&mime=xml'),
+                          (u'Lifestyle', u'http://www.detnews.com/apps/pbcs.dll/section?category=rss32&mime=xml'),
+                          (u'Health', u'http://www.detnews.com/apps/pbcs.dll/section?category=rss34&mime=xml'),
+                          (u'Travel', u'http://www.detnews.com/apps/pbcs.dll/section?category=rss52&mime=xml'),
+                          (u'Advice', u'http://www.detnews.com/apps/pbcs.dll/section?category=rss50&mime=xml'),
+                          (u'Pistons', u'http://detnews.com/apps/pbcs.dll/section?category=rss13&mime=xml'),
+                          (u'Lions', u'http://detnews.com/apps/pbcs.dll/section?category=rss14&mime=xml'),
+                          (u'Tigers', u'http://detnews.com/apps/pbcs.dll/section?category=rss15&mime=xml'),
+                          (u'Red Wings', u'http://detnews.com/apps/pbcs.dll/section?category=rss16&mime=xml'),
+                          (u'Michigan State', u'http://detnews.com/apps/pbcs.dll/section?category=rss18&mime=xml'),
+                          (u'University of Michigan', u'http://detnews.com/apps/pbcs.dll/section?category=rss17&mime=xml'),
+                          (u'Motor Sports', u'http://detnews.com/apps/pbcs.dll/section?category=rss20&mime=xml'),
+                          (u'Golf', u'http://detnews.com/apps/pbcs.dll/section?category=rss47&mime=xml'),
+                          (u'Outdoors', u'http://detnews.com/apps/pbcs.dll/section?category=rss19&mime=xml')
+                          ]
+
+    def print_version(self, url):
+          p = re.compile('(/\d{4}|/-1)/(rss|ENT|LIFESTYLE|OPINION|METRO)\d*')
+          m = p.search(url)
+          return url.replace(m.group(), '&template=printart')
+
--- a/resources/recipes/epl_talk.recipe
+++ b/resources/recipes/epl_talk.recipe
@ -0,0 +1,37 @@
+#!/usr/bin/env  python
+__license__ = 'GPL 3'
+__copyright__ = 'zotzot'
+__docformat__ = 'restructuredtext en'
+'''
+http://www.epltalk.com
+'''
+from calibre.web.feeds.news import BasicNewsRecipe
+
+
+class EPLTalkRecipe(BasicNewsRecipe):
+    __license__ = 'GPL v3'
+    __author__ = u'The Gaffer'
+    language = 'en'
+    version = 1
+
+    title = u'EPL Talk'
+    publisher = u'The Gaffer'
+    publication_type = 'Blog'
+    category = u'Soccer'
+    description = u'News and Analysis from the English Premier League'
+    cover_url = 'http://bit.ly/hJxZPu'
+
+    oldest_article = 45
+    max_articles_per_feed = 150
+    use_embedded_content = True
+    remove_javascript = True
+    encoding = 'utf8'
+
+    remove_tags_after = [dict(name='div', attrs={'class':'pd-rating'})]
+
+    feeds = [(u'EPL Talk', u'http://feeds.feedburner.com/EPLTalk')]
+
+    extra_css = '''
+                body{font-family:verdana,arial,helvetica,geneva,sans-serif;}
+                img {float: left; margin-right: 0.5em;}
+                '''
--- a/resources/recipes/fan_graphs.recipe
+++ b/resources/recipes/fan_graphs.recipe
@ -0,0 +1,39 @@
+#!/usr/bin/env  python
+__license__  = 'GPL v3'
+__copyright__ = '2011 zotzot'
+__docformat__ = 'PEP8'
+'''
+www.fangraphs.com
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class FanGraphs(BasicNewsRecipe):
+    title = u'FanGraphs'
+    oldest_article = 21
+    max_articles_per_feed = 100
+    no_stylesheets = True
+    #delay = 1
+    use_embedded_content = False
+    encoding = 'utf8'
+    publisher = 'Fangraphs'
+    category = 'Baseball'
+    language = 'en'
+    publication_type = 'Blog'
+
+    description = 'Baseball statistical analysis, graphs, and projections.'
+    __author__ = 'David Appelman'
+    cover_url = 'http://bit.ly/g0BTdQ'
+
+    feeds = [
+    (u'Fangraphs', u'http://feeds.feedburner.com/FanGraphs?format=xml'),
+    (u'Rotographs', u'http://www.wizardrss.com/feed/feeds.feedburner.com/RotoGraphs?format=xml'),
+    (u'Community', u'http://www.wizardrss.com/feed/www.fangraphs.com/community/?feed=rss2'),
+    (u'NotGraphs', u'http://www.wizardrss.com/feed/www.fangraphs.com/not/?feed=rss2')]
+
+    extra_css = '''
+                    h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
+                    h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
+                    p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
+                    body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
+'''
--- a/resources/recipes/mediaindonesia.recipe
+++ b/resources/recipes/mediaindonesia.recipe
@ -0,0 +1,40 @@
+#!/usr/bin/env  python
+__license__ = 'GPL v3'
+__copyright__ = '2011, bakthariq AT gmail.com'
+'''
+m.mediaindonesia.com
+'''
+
+from calibre.web.feeds.recipes import BasicNewsRecipe
+
+
+class Media(BasicNewsRecipe):
+    title                 = u'Media Indonesia'
+    __author__            = 'bakthariq'
+    oldest_article        = 1
+    max_articles_per_feed = 500
+    timefmt               = ' [%a, %b %d, %Y]'
+    language              = 'id'
+    category              = 'News, Indonesia'
+    publisher             = 'Media Indonesia'
+    encoding              = 'utf-8'
+    no_stylesheets        = True
+    description           = 'Indonesian Media Indonesia newsportal'
+    cover_url             = 'http://m.mediaindonesia.com/public/img/logo.gif'
+    no_javascript         = True
+
+    remove_tags           = [dict(id=['atas','merah','putih']), dict(name='a')]
+    remove_tags_after     = [dict(id="putih")]
+
+    extra_css             = '''
+                            .judul {font-size: x-large;}
+                            .tgl {font-size: x-small;color:#333333;}
+                            .foto {font-size: x-small;}
+		            '''
+
+    feeds = [(u'Polhukam', u'http://www.mediaindonesia.com/rss/1/polhukam'), (u'Ekonomi dan Bisnis', u'http://www.mediaindonesia.com/rss/2/ekonomi-dan-bisnis'),
+(u'Internasional', u'http://www.mediaindonesia.com/rss/6/internasional'), (u'Olahraga', u'http://www.mediaindonesia.com/rss/3/olahraga'),(u'Sepakbola',
+u'http://www.mediaindonesia.com/rss/4/sepakbola'),(u'Megapolitan', u'http://www.mediaindonesia.com/rss/5/megapolitan'), (u'Sains dan Teknologi',
+u'http://www.mediaindonesia.com/rss/7/sains-dan-teknologi'), (u'Humaniora', u'http://www.mediaindonesia.com/rss/14/humaniora'), (u'Hiburan',
+u'http://www.mediaindonesia.com/rss/10/hiburan'), (u'Opini', u'http://www.mediaindonesia.com/rss/11/opini')]
+
--- a/resources/recipes/nrc.nl.recipe
+++ b/resources/recipes/nrc.nl.recipe
@ -21,8 +21,8 @@ class Pagina12(BasicNewsRecipe):
    country               = 'NL'
    remove_empty_feeds    = True
    masthead_url          = 'http://www.nrc.nl/nrc.nl/images/logo_nrc.png'
-    extra_css             = """ 
-                                body{font-family: Georgia,serif } 
+    extra_css             = """
+                                body{font-family: Georgia,serif }
                                img{margin-bottom: 0.4em; display: block}
                                .bijschrift,.sectie{font-size: x-small}
                                .sectie{color: gray}
@ -38,10 +38,10 @@ class Pagina12(BasicNewsRecipe):
    keep_only_tags = [dict(attrs={'class':'uitstekendekeus'})]
    remove_tags    = [
                        dict(name=['meta','base','link','object','embed'])
-                       ,dict(attrs={'class':['reclamespace','tags-and-sharing']})
+                       ,dict(attrs={'class':['reclamespace','tags-and-sharing','sharing-is-caring']})
                     ]
    remove_attributes=['lang']
-    
+
    feeds = [
              (u'Voor nieuws', u'http://www.nrc.nl/nieuws/categorie/nieuws/rss.php'           )
             ,(u'Binnenland' , u'http://www.nrc.nl/nieuws/categorie/binnenland/rss.php'       )
@ -69,8 +69,8 @@ class Pagina12(BasicNewsRecipe):
                         del item[atit]
               else:
                   str = self.tag_to_string(item)
-                   item.replaceWith(str)            
+                   item.replaceWith(str)
        for item in soup.findAll('img'):
            if not item.has_key('alt'):
-               item['alt'] = 'image'               
+               item['alt'] = 'image'
        return soup
--- a/resources/recipes/oregonian.recipe
+++ b/resources/recipes/oregonian.recipe
@ -0,0 +1,44 @@
+from __future__ import with_statement
+__license__ = 'GPL 3'
+__copyright__ = 'zotzot'
+__docformat__ = 'restructuredtext en'
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+
+class Oregonian(BasicNewsRecipe):
+    title = u'The Oregonian'
+    oldest_article = 2
+    max_articles_per_feed = 100
+    language = 'en'
+    __author__ = 'Zotzot'
+    description = 'Portland, Oregon local newspaper'
+    publisher = 'Advance Publications'
+    category = 'news, Portland'
+    cover_url = 'http://bit.ly/gUgxGd'
+    no_stylesheets = True
+    masthead_url = 'http://bit.ly/eocL70'
+    remove_tags = [dict(name='div', attrs={'class':['footer', 'content']})]
+    use_embedded_content = False
+    remove_tags_before = dict(id='article')
+    remove_tags_after = dict(id='article')
+    feeds = [
+#(u'Timbers', u'feed://blog.oregonlive.com/timbers_impact/atom.xml'),
+(u'News', u'http://blog.oregonlive.com/news_impact/atom.xml'),
+(u'Opinion', u'http://blog.oregonlive.com/opinion_impact/atom.xml'),
+(u'Living', u'http://blog.oregonlive.com/living_impact/atom.xml'),
+(u'Sports', u'http://blog.oregonlive.com/sports_impact/atom.xml'),
+(u'Business', u'http://blog.oregonlive.com/business_impact/atom.xml')]
+
+    extra_css = '''
+                    h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
+                    h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
+                    p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
+                    body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
+'''
+
+
+def get_article_url(self, article):
+        url = BasicNewsRecipe.get_article_url(self, article)
+        if '/video/' not in url:
+            return url
--- a/src/calibre/customize/builtins.py
+++ b/src/calibre/customize/builtins.py
@ -8,6 +8,7 @@ from calibre.customize import FileTypePlugin, MetadataReaderPlugin, \
    MetadataWriterPlugin, PreferencesPlugin, InterfaceActionBase
 from calibre.constants import numeric_version
 from calibre.ebooks.metadata.archive import ArchiveExtract, get_cbz_metadata
+from calibre.ebooks.metadata.opf2 import metadata_to_opf
 from calibre.ebooks.oeb.base import OEB_IMAGES

 # To archive plugins {{{
@ -94,22 +95,22 @@ class TXT2TXTZ(FileTypePlugin):
    file_types = set(['txt'])
    supported_platforms = ['windows', 'osx', 'linux']
    on_import = True
-    
+
    def _get_image_references(self, txt, base_dir):
        images = []
-        
+
        # Textile
        for m in re.finditer(ur'(?mu)(?:[\[{])?\!(?:\. )?(?P<path>[^\s(!]+)\s?(?:\(([^\)]+)\))?\!(?::(\S+))?(?:[\]}]|(?=\s|$))', txt):
            path = m.group('path')
            if path and not os.path.isabs(path) and guess_type(path)[0] in OEB_IMAGES and os.path.exists(os.path.join(base_dir, path)):
                images.append(path)
-                
-        # Markdown inline        
+
+        # Markdown inline
        for m in re.finditer(ur'(?mu)\!\[([^\]\[]*(\[[^\]\[]*(\[[^\]\[]*(\[[^\]\[]*(\[[^\]\[]*(\[[^\]\[]*(\[[^\]\[]*\])*[^\]\[]*\])*[^\]\[]*\])*[^\]\[]*\])*[^\]\[]*\])*[^\]\[]*\])*[^\]\[]*)\]\s*\((?P<path>[^\)]*)\)', txt):
            path = m.group('path')
            if path and not os.path.isabs(path) and guess_type(path)[0] in OEB_IMAGES and os.path.exists(os.path.join(base_dir, path)):
                images.append(path)
-        
+
        # Markdown reference
        refs = {}
        for m in re.finditer(ur'(?mu)^(\ ?\ ?\ ?)\[(?P<id>[^\]]*)\]:\s*(?P<path>[^\s]*)$', txt):
@ -122,19 +123,30 @@ class TXT2TXTZ(FileTypePlugin):

        # Remove duplicates
        return list(set(images))
-    
+
    def run(self, path_to_ebook):
        with open(path_to_ebook, 'rb') as ebf:
            txt = ebf.read()
        base_dir = os.path.dirname(path_to_ebook)
        images = self._get_image_references(txt, base_dir)
-        
+
        if images:
            # Create TXTZ and put file plus images inside of it.
            import zipfile
            of = self.temporary_file('_plugin_txt2txtz.txtz')
            txtz = zipfile.ZipFile(of.name, 'w')
+            # Add selected TXT file to archive.
            txtz.write(path_to_ebook, os.path.basename(path_to_ebook), zipfile.ZIP_DEFLATED)
+            # metadata.opf
+            if os.path.exists(os.path.join(base_dir, 'metadata.opf')):
+                txtz.write(os.path.join(base_dir, 'metadata.opf'), 'metadata.opf', zipfile.ZIP_DEFLATED)
+            else:
+                from calibre.ebooks.metadata.txt import get_metadata
+                with open(path_to_ebook, 'rb') as ebf:
+                    mi = get_metadata(ebf)
+                opf = metadata_to_opf(mi)
+                txtz.writestr('metadata.opf', opf, zipfile.ZIP_DEFLATED)
+            # images
            for image in images:
                txtz.write(os.path.join(base_dir, image), image)
            txtz.close()
@ -1018,3 +1030,10 @@ plugins += [LookAndFeel, Behavior, Columns, Toolbar, Search, InputOptions,
        Email, Server, Plugins, Tweaks, Misc, TemplateFunctions]

 #}}}
+
+# New metadata download plugins {{{
+from calibre.ebooks.metadata.sources.google import GoogleBooks
+
+plugins += [GoogleBooks]
+
+# }}}
--- a/src/calibre/customize/ui.py
+++ b/src/calibre/customize/ui.py
@ -20,6 +20,7 @@ from calibre.ebooks.metadata.fetch import MetadataSource
 from calibre.utils.config import make_config_dir, Config, ConfigProxy, \
                                 plugin_dir, OptionParser, prefs
 from calibre.ebooks.epub.fix import ePubFixer
+from calibre.ebooks.metadata.sources.base import Source

 platform = 'linux'
 if iswindows:
@ -493,6 +494,17 @@ def epub_fixers():
                    yield plugin
 # }}}

+# Metadata sources2 {{{
+def metadata_plugins(capabilities):
+    capabilities = frozenset(capabilities)
+    for plugin in _initialized_plugins:
+        if isinstance(plugin, Source) and \
+                plugin.capabilities.intersection(capabilities) and \
+                not is_disabled(plugin):
+            yield plugin
+
+# }}}
+
 # Initialize plugins {{{

 _initialized_plugins = []
--- a/src/calibre/devices/apple/driver.py
+++ b/src/calibre/devices/apple/driver.py
@ -24,6 +24,7 @@ from calibre.utils.logging import Log
 from calibre.utils.zipfile import ZipFile

 from PIL import Image as PILImage
+from lxml import etree

 if isosx:
    try:
@ -2515,19 +2516,17 @@ class ITUNES(DriverBase):
            fnames = zf_opf.namelist()
            opf = [x for x in fnames if '.opf' in x][0]
            if opf:
-                opf_raw = cStringIO.StringIO(zf_opf.read(opf))
-                soup = BeautifulSoup(opf_raw.getvalue())
-                opf_raw.close()
-
-                # Touch existing calibre timestamp
-                md = soup.find('metadata')
-                if md:
-                    ts = md.find('meta',attrs={'name':'calibre:timestamp'})
-                    if ts:
-                        timestamp = ts['content']
+                opf_tree = etree.fromstring(zf_opf.read(opf))
+                md_els = opf_tree.xpath('.//*[local-name()="metadata"]')
+                if md_els:
+                    ts = md_els[0].find('.//*[@name="calibre:timestamp"]')
+                    if ts is not None:
+                        timestamp = ts.get('content')
                        old_ts = parse_date(timestamp)
                        metadata.timestamp = datetime.datetime(old_ts.year, old_ts.month, old_ts.day, old_ts.hour,
                                                   old_ts.minute, old_ts.second, old_ts.microsecond+1, old_ts.tzinfo)
+                        if DEBUG:
+                            self.log.info("   existing timestamp: %s" % metadata.timestamp)
                    else:
                        metadata.timestamp = now()
                        if DEBUG:
@ -2537,7 +2536,6 @@ class ITUNES(DriverBase):
                    if DEBUG:
                        self.log.warning("   missing <metadata> block in OPF file")
                        self.log.info("   add timestamp: %s" % metadata.timestamp)
-
                # Force the language declaration for iBooks 1.1
                #metadata.language = get_lang().replace('_', '-')

@ -2839,7 +2837,7 @@ class ITUNES(DriverBase):
    def _xform_metadata_via_plugboard(self, book, format):
        ''' Transform book metadata from plugboard templates '''
        if DEBUG:
-            self.log.info("  ITUNES._update_metadata_from_plugboard()")
+            self.log.info("  ITUNES._xform_metadata_via_plugboard()")

        if self.plugboard_func:
            pb = self.plugboard_func(self.DEVICE_PLUGBOARD_NAME, format, self.plugboards)
--- a/src/calibre/devices/kindle/apnx.py
+++ b/src/calibre/devices/kindle/apnx.py
@ -25,7 +25,7 @@ class APNXBuilder(object):
        with open(mobi_file_path, 'rb') as mf:
            ident = PdbHeaderReader(mf).identity()
        if ident != 'BOOKMOBI':
-            raise Exception(_('Not a valid MOBI file. Reports identity of %s' % ident))
+            raise Exception(_('Not a valid MOBI file. Reports identity of %s') % ident)

        # Get the pages depending on the chosen parser
        pages = []
--- a/src/calibre/devices/kobo/driver.py
+++ b/src/calibre/devices/kobo/driver.py
@ -22,7 +22,7 @@ class KOBO(USBMS):
    gui_name = 'Kobo Reader'
    description = _('Communicate with the Kobo Reader')
    author = 'Timothy Legge and Kovid Goyal'
-    version = (1, 0, 7)
+    version = (1, 0, 9)

    dbversion = 0
    fwversion = 0
@ -124,9 +124,12 @@ class KOBO(USBMS):
                        if imagename is not None:
                            bl[idx].thumbnail = ImageWrapper(imagename)
                    if (ContentType != '6' and MimeType != 'Shortcover'):
-                        if self.update_metadata_item(bl[idx]):
-                            # print 'update_metadata_item returned true'
-                            changed = True
+                        if os.path.exists(self.normalize_path(os.path.join(prefix, lpath))):
+                            if self.update_metadata_item(bl[idx]):
+                                # print 'update_metadata_item returned true'
+                                changed = True
+                        else:
+                             debug_print("    Strange:  The file: ", prefix, lpath, " does mot exist!")
                    if lpath in playlist_map and \
                        playlist_map[lpath] not in bl[idx].device_collections:
                            bl[idx].device_collections.append(playlist_map[lpath])
@ -135,7 +138,13 @@ class KOBO(USBMS):
                        book =  Book(prefix, lpath, title, authors, mime, date, ContentType, ImageID, size=1048576)
                    else:
                        try:
-                            book = self.book_from_path(prefix, lpath, title, authors, mime, date, ContentType, ImageID)
+                            if os.path.exists(self.normalize_path(os.path.join(prefix, lpath))):
+                                book = self.book_from_path(prefix, lpath, title, authors, mime, date, ContentType, ImageID)
+                            else:
+                                debug_print("    Strange:  The file: ", prefix, lpath, " does mot exist!")
+                                title = "FILE MISSING: " + title
+                                book =  Book(prefix, lpath, title, authors, mime, date, ContentType, ImageID, size=1048576)
+
                        except:
                            debug_print("prefix: ", prefix, "lpath: ", lpath, "title: ", title, "authors: ", authors, \
                                        "mime: ", mime, "date: ", date, "ContentType: ", ContentType, "ImageID: ", ImageID)
@ -152,6 +161,10 @@ class KOBO(USBMS):
            return changed

        connection = sqlite.connect(self.normalize_path(self._main_prefix + '.kobo/KoboReader.sqlite'))
+        
+        # return bytestrings if the content cannot the decoded as unicode
+        connection.text_factory = lambda x: unicode(x, "utf-8", "ignore")
+
        cursor = connection.cursor()

        #query = 'select count(distinct volumeId) from volume_shortcovers'
@ -221,6 +234,10 @@ class KOBO(USBMS):

        debug_print('delete_via_sql: ContentID: ', ContentID, 'ContentType: ', ContentType)
        connection = sqlite.connect(self.normalize_path(self._main_prefix + '.kobo/KoboReader.sqlite'))
+        
+        # return bytestrings if the content cannot the decoded as unicode
+        connection.text_factory = lambda x: unicode(x, "utf-8", "ignore")
+
        cursor = connection.cursor()
        t = (ContentID,)
        cursor.execute('select ImageID from content where ContentID = ?', t)
@ -494,6 +511,10 @@ class KOBO(USBMS):
        # the last book from the collection the list of books is empty
        # and the removal of the last book would not occur
        connection = sqlite.connect(self.normalize_path(self._main_prefix + '.kobo/KoboReader.sqlite'))
+        
+        # return bytestrings if the content cannot the decoded as unicode
+        connection.text_factory = lambda x: unicode(x, "utf-8", "ignore")
+
        cursor = connection.cursor()


--- a/src/calibre/ebooks/chardet/init.py
+++ b/src/calibre/ebooks/chardet/init.py
@ -53,7 +53,7 @@ _CHARSET_ALIASES = { "macintosh" : "mac-roman",
 def force_encoding(raw, verbose, assume_utf8=False):
    from calibre.constants import preferred_encoding
    try:
-        chardet = detect(raw)
+        chardet = detect(raw[:1024*50])
    except:
        chardet = {'encoding':preferred_encoding, 'confidence':0}
    encoding = chardet['encoding']
--- a/src/calibre/ebooks/conversion/preprocess.py
+++ b/src/calibre/ebooks/conversion/preprocess.py
@ -568,11 +568,14 @@ class HTMLPreProcessor(object):
    def smarten_punctuation(self, html):
        from calibre.utils.smartypants import smartyPants
        from calibre.ebooks.chardet import substitute_entites
+        from calibre.ebooks.conversion.utils import HeuristicProcessor
+        preprocessor = HeuristicProcessor(self.extra_opts, self.log)
        from uuid import uuid4
        start = 'calibre-smartypants-'+str(uuid4())
        stop = 'calibre-smartypants-'+str(uuid4())
        html = html.replace('<!--', start)
        html = html.replace('-->', stop)
+        html = preprocessor.fix_nbsp_indents(html)
        html = smartyPants(html)
        html = html.replace(start, '<!--')
        html = html.replace(stop, '-->')
--- a/src/calibre/ebooks/conversion/utils.py
+++ b/src/calibre/ebooks/conversion/utils.py
@ -157,7 +157,7 @@ class HeuristicProcessor(object):

        ITALICIZE_STYLE_PATS = [
            r'(?msu)(?<=[\s>])_(?P<words>[^_]+)_',
-            r'(?msu)(?<=[\s>])/(?P<words>[^/]+)/',
+            r'(?msu)(?<=[\s>])/(?P<words>[^/\*>]+)/',
            r'(?msu)(?<=[\s>])~~(?P<words>[^~]+)~~',
            r'(?msu)(?<=[\s>])\*(?P<words>[^\*]+)\*',
            r'(?msu)(?<=[\s>])~(?P<words>[^~]+)~',
--- a/src/calibre/ebooks/metadata/sources/amazon.py
+++ b/src/calibre/ebooks/metadata/sources/amazon.py
@ -0,0 +1,22 @@
+#!/usr/bin/env python
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
+from __future__ import (unicode_literals, division, absolute_import,
+                        print_function)
+
+__license__   = 'GPL v3'
+__copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
+__docformat__ = 'restructuredtext en'
+
+
+from calibre.ebooks.metadata.sources.base import Source
+
+class Amazon(Source):
+
+    name = 'Amazon'
+    description = _('Downloads metadata from Amazon')
+
+    capabilities = frozenset(['identify', 'cover'])
+    touched_fields = frozenset(['title', 'authors', 'isbn', 'pubdate',
+        'comments', 'cover_data'])
+
+
--- a/src/calibre/ebooks/metadata/sources/base.py
+++ b/src/calibre/ebooks/metadata/sources/base.py
@ -1,5 +1,7 @@
 #!/usr/bin/env python
 # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
+from __future__ import (unicode_literals, division, absolute_import,
+                        print_function)

 __license__   = 'GPL v3'
 __copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
@ -8,6 +10,12 @@ __docformat__ = 'restructuredtext en'
 import re

 from calibre.customize import Plugin
+from calibre.utils.logging import ThreadSafeLog, FileStream
+
+def create_log(ostream=None):
+    log = ThreadSafeLog(level=ThreadSafeLog.DEBUG)
+    log.outputs = [FileStream(ostream)]
+    return log

 class Source(Plugin):

@ -18,14 +26,47 @@ class Source(Plugin):

    result_of_identify_is_complete = True

-    def get_author_tokens(self, authors):
-        'Take a list of authors and return a list of tokens useful for a '
-        'AND search query'
-        # Leave ' in there for Irish names
-        pat = re.compile(r'[-,:;+!@#$%^&*(){}.`~"\s\[\]/]')
-        for au in authors:
-            for tok in au.split():
-                yield pat.sub('', tok)
+    capabilities = frozenset()
+
+    touched_fields = frozenset()
+
+    # Utility functions {{{
+    def get_author_tokens(self, authors, only_first_author=True):
+        '''
+        Take a list of authors and return a list of tokens useful for an
+        AND search query. This function tries to return tokens in
+        first name middle names last name order, by assuming that if a comma is
+        in the author name, the name is in lastname, other names form.
+        '''
+
+        if authors:
+            # Leave ' in there for Irish names
+            pat = re.compile(r'[-,:;+!@#$%^&*(){}.`~"\s\[\]/]')
+            if only_first_author:
+                authors = authors[:1]
+            for au in authors:
+                parts = au.split()
+                if ',' in au:
+                    # au probably in ln, fn form
+                    parts = parts[1:] + parts[:1]
+                for tok in parts:
+                    tok = pat.sub('', tok).strip()
+                    yield tok
+
+
+    def get_title_tokens(self, title):
+        '''
+        Take a title and return a list of tokens useful for an AND search query.
+        Excludes connectives and punctuation.
+        '''
+        if title:
+            pat = re.compile(r'''[-,:;+!@#$%^&*(){}.`~"'\s\[\]/]''')
+            title = pat.sub(' ', title)
+            tokens = title.split()
+            for token in tokens:
+                token = token.strip()
+                if token and token.lower() not in ('a', 'and', 'the'):
+                    yield token

    def split_jobs(self, jobs, num):
        'Split a list of jobs into at most num groups, as evenly as possible'
@ -40,6 +81,10 @@ class Source(Plugin):
                gr.append(job)
        return [g for g in groups if g]

+    # }}}
+
+    # Metadata API {{{
+
    def identify(self, log, result_queue, abort, title=None, authors=None, identifiers={}):
        '''
        Identify a book by its title/author/isbn/etc.
@ -59,3 +104,5 @@ class Source(Plugin):
        '''
        return None

+    # }}}
+
--- a/src/calibre/ebooks/metadata/sources/google.py
+++ b/src/calibre/ebooks/metadata/sources/google.py
@ -1,5 +1,7 @@
 #!/usr/bin/env python
 # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
+from __future__ import (unicode_literals, division, absolute_import,
+                        print_function)

 __license__   = 'GPL v3'
 __copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
@ -12,8 +14,9 @@ from threading import Thread

 from lxml import etree

-from calibre.ebooks.metadata.sources import Source
+from calibre.ebooks.metadata.sources.base import Source
 from calibre.ebooks.metadata.book.base import Metadata
+from calibre.ebooks.chardet import xml_to_unicode
 from calibre.utils.date import parse_date, utcnow
 from calibre import browser, as_unicode

@ -38,7 +41,18 @@ subject        = XPath('descendant::dc:subject')
 description    = XPath('descendant::dc:description')
 language       = XPath('descendant::dc:language')

+def get_details(browser, url):
+    try:
+        raw = browser.open_novisit(url).read()
+    except Exception as e:
+        gc = getattr(e, 'getcode', lambda : -1)
+        if gc() != 403:
+            raise
+        # Google is throttling us, wait a little
+        time.sleep(2)
+        raw = browser.open_novisit(url).read()

+    return raw

 def to_metadata(browser, log, entry_):

@ -65,8 +79,8 @@ def to_metadata(browser, log, entry_):

    mi = Metadata(title_, authors)
    try:
-        raw = browser.open_novisit(id_url).read()
-        feed = etree.fromstring(raw)
+        raw = get_details(browser, id_url)
+        feed = etree.fromstring(xml_to_unicode(raw, strip_encoding_pats=True)[0])
        extra = entry(feed)[0]
    except:
        log.exception('Failed to get additional details for', mi.title)
@ -142,9 +156,13 @@ class Worker(Thread):
 class GoogleBooks(Source):

    name = 'Google Books'
+    description = _('Downloads metadata from Google Books')

-    def create_query(self, log, title=None, authors=None, identifiers={},
-            start_index=1):
+    capabilities = frozenset(['identify'])
+    touched_fields = frozenset(['title', 'authors', 'isbn', 'tags', 'pubdate',
+        'comments', 'publisher', 'author_sort']) # language currently disabled
+
+    def create_query(self, log, title=None, authors=None, identifiers={}):
        BASE_URL = 'http://books.google.com/books/feeds/volumes?'
        isbn = identifiers.get('isbn', None)
        q = ''
@ -153,11 +171,14 @@ class GoogleBooks(Source):
        elif title or authors:
            def build_term(prefix, parts):
                return ' '.join('in'+prefix + ':' + x for x in parts)
-            if title is not None:
-                q += build_term('title', title.split())
-            if authors:
-                q += ('+' if q else '')+build_term('author',
-                        self.get_author_tokens(authors))
+            title_tokens = list(self.get_title_tokens(title))
+            if title_tokens:
+                q += build_term('title', title_tokens)
+            author_tokens = self.get_author_tokens(authors,
+                    only_first_author=True)
+            if author_tokens:
+                q += ('+' if q else '') + build_term('author',
+                        author_tokens)

        if isinstance(q, unicode):
            q = q.encode('utf-8')
@ -166,7 +187,7 @@ class GoogleBooks(Source):
        return BASE_URL+urlencode({
            'q':q,
            'max-results':20,
-            'start-index':start_index,
+            'start-index':1,
            'min-viewability':'none',
            })

@ -182,7 +203,8 @@ class GoogleBooks(Source):

        try:
            parser = etree.XMLParser(recover=True, no_network=True)
-            feed = etree.fromstring(raw, parser=parser)
+            feed = etree.fromstring(xml_to_unicode(raw,
+                strip_encoding_pats=True)[0], parser=parser)
            entries = entry(feed)
        except Exception, e:
            log.exception('Failed to parse identify results')
@ -191,25 +213,33 @@ class GoogleBooks(Source):

        groups = self.split_jobs(entries, 5) # At most 5 threads
        if not groups:
-            return
+            return None
        workers = [Worker(log, entries, abort, result_queue) for entries in
                groups]

        if abort.is_set():
-            return
+            return None

        for worker in workers: worker.start()

        has_alive_worker = True
        while has_alive_worker and not abort.is_set():
+            time.sleep(0.1)
            has_alive_worker = False
            for worker in workers:
                if worker.is_alive():
                    has_alive_worker = True
-            time.sleep(0.1)

        return None

-
-
-
+if __name__ == '__main__':
+    # To run these test use: calibre-debug -e src/calibre/ebooks/metadata/sources/google.py
+    from calibre.ebooks.metadata.sources.test import (test_identify_plugin,
+            isbn_test)
+    test_identify_plugin(GoogleBooks.name,
+        [
+            (
+                {'title': 'Great Expectations', 'authors':['Charles Dickens']},
+                [isbn_test('9781607541592')]
+            ),
+    ])
--- a/src/calibre/ebooks/metadata/sources/test.py
+++ b/src/calibre/ebooks/metadata/sources/test.py
@ -0,0 +1,92 @@
+#!/usr/bin/env python
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
+from __future__ import (unicode_literals, division, absolute_import,
+                        print_function)
+
+__license__   = 'GPL v3'
+__copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
+__docformat__ = 'restructuredtext en'
+
+import os, tempfile
+from Queue import Queue, Empty
+from threading import Event
+
+
+from calibre.customize.ui import metadata_plugins
+from calibre import prints
+from calibre.ebooks.metadata import check_isbn
+from calibre.ebooks.metadata.sources.base import create_log
+
+def isbn_test(isbn):
+    isbn_ = check_isbn(isbn)
+
+    def test(mi):
+        misbn = check_isbn(mi.isbn)
+        return misbn and misbn == isbn_
+
+    return test
+
+def test_identify_plugin(name, tests):
+    '''
+    :param name: Plugin name
+    :param tests: List of 2-tuples. Each two tuple is of the form (args,
+                  test_funcs). args is a dict of keyword arguments to pass to
+                  the identify method. test_funcs are callables that accept a
+                  Metadata object and return True iff the object passes the
+                  test.
+    '''
+    plugin = None
+    for x in metadata_plugins(['identify']):
+        if x.name == name:
+            plugin = x
+            break
+    prints('Testing the identify function of', plugin.name)
+
+    tdir = tempfile.gettempdir()
+    lf = os.path.join(tdir, plugin.name.replace(' ', '')+'_identify_test.txt')
+    log = create_log(open(lf, 'wb'))
+    abort = Event()
+    prints('Log saved to', lf)
+
+    for kwargs, test_funcs in tests:
+        prints('Running test with:', kwargs)
+        rq = Queue()
+        args = (log, rq, abort)
+        err = plugin.identify(*args, **kwargs)
+        if err is not None:
+            prints('identify returned an error for args', args)
+            prints(err)
+            break
+
+        results = []
+        while True:
+            try:
+                results.append(rq.get_nowait())
+            except Empty:
+                break
+
+        prints('Found', len(results), 'matches:')
+
+        for mi in results:
+            prints(mi)
+            prints('\n\n')
+
+        match_found = None
+        for mi in results:
+            test_failed = False
+            for tfunc in test_funcs:
+                if not tfunc(mi):
+                    test_failed = True
+                    break
+            if not test_failed:
+                match_found = mi
+                break
+
+        if match_found is None:
+            prints('ERROR: No results that passed all tests were found')
+            prints('Log saved to', lf)
+            raise SystemExit(1)
+
+    if os.stat(lf).st_size > 10:
+        prints('There were some errors, see log', lf)
+
--- a/src/calibre/ebooks/mobi/writer.py
+++ b/src/calibre/ebooks/mobi/writer.py
@ -1818,7 +1818,7 @@ class MobiWriter(object):
            text = text.strip()
            if not isinstance(text, unicode):
                text = text.decode('utf-8', 'replace')
-            text = text.encode('utf-8')
+            text = normalize(text).encode('utf-8')
        else :
            text = "(none)".encode('utf-8')
        return text
--- a/src/calibre/ebooks/pml/pmlconverter.py
+++ b/src/calibre/ebooks/pml/pmlconverter.py
@ -151,8 +151,8 @@ class PML_HTMLizer(object):
    def prepare_pml(self, pml):
        # Give Chapters the form \\*='text'text\\*. This is used for generating
        # the TOC later.
-        pml = re.sub(r'(?<=\\x)(?P<text>.*?)(?=\\x)', lambda match: '="%s"%s' % (self.strip_pml(match.group('text')), match.group('text')), pml)
-        pml = re.sub(r'(?<=\\X[0-4])(?P<text>.*?)(?=\\X[0-4])', lambda match: '="%s"%s' % (self.strip_pml(match.group('text')), match.group('text')), pml)
+        pml = re.sub(r'(?msu)(?P<c>\\x)(?P<text>.*?)(?P=c)', lambda match: '%s="%s"%s%s' % (match.group('c'), self.strip_pml(match.group('text')), match.group('text'), match.group('c')), pml)
+        pml = re.sub(r'(?msu)(?P<c>\\X[0-4])(?P<text>.*?)(?P=c)', lambda match: '%s="%s"%s%s' % (match.group('c'), self.strip_pml(match.group('text')), match.group('text'), match.group('c')), pml)

        # Remove comments
        pml = re.sub(r'(?mus)\\v(?P<text>.*?)\\v', '', pml)
@ -190,9 +190,10 @@ class PML_HTMLizer(object):
        pml = re.sub(r'\\a\d\d\d', '', pml)
        pml = re.sub(r'\\U\d\d\d\d', '', pml)
        pml = re.sub(r'\\.', '', pml)
-        pml.replace('\r\n', ' ')
-        pml.replace('\n', ' ')
-        pml.replace('\r', ' ')
+        pml = pml.replace('\r\n', ' ')
+        pml = pml.replace('\n', ' ')
+        pml = pml.replace('\r', ' ')
+        pml = pml.strip()

        return pml

--- a/src/calibre/gui2/actions/add.py
+++ b/src/calibre/gui2/actions/add.py
@ -8,18 +8,20 @@ __docformat__ = 'restructuredtext en'
 import os
 from functools import partial

-from PyQt4.Qt import QPixmap, QMenu
+from PyQt4.Qt import QPixmap, QMenu, QTimer


 from calibre.gui2 import error_dialog, choose_files, \
    choose_dir, warning_dialog, info_dialog
 from calibre.gui2.dialogs.add_empty_book import AddEmptyBookDialog
+from calibre.gui2.dialogs.progress import ProgressDialog
 from calibre.gui2.widgets import IMAGE_EXTENSIONS
 from calibre.ebooks import BOOK_EXTENSIONS
 from calibre.utils.filenames import ascii_filename
 from calibre.constants import preferred_encoding, filesystem_encoding
 from calibre.gui2.actions import InterfaceAction
 from calibre.gui2 import config
+from calibre.ebooks.metadata import MetaInformation

 class AddAction(InterfaceAction):

@ -95,7 +97,6 @@ class AddAction(InterfaceAction):
        dlg = AddEmptyBookDialog(self.gui, self.gui.library_view.model().db, author)
        if dlg.exec_() == dlg.Accepted:
            num = dlg.qty_to_add
-            from calibre.ebooks.metadata import MetaInformation
            for x in xrange(num):
                mi = MetaInformation(_('Unknown'), dlg.selected_authors)
                self.gui.library_view.model().db.import_book(mi, [])
@ -105,27 +106,45 @@ class AddAction(InterfaceAction):
            self.gui.tags_view.recount()

    def add_isbns(self, books, add_tags=[]):
-        from calibre.ebooks.metadata import MetaInformation
-        ids = set([])
-        db = self.gui.library_view.model().db
+        self.isbn_books = list(books)
+        self.add_by_isbn_ids = set()
+        self.isbn_add_tags = add_tags
+        QTimer.singleShot(10, self.do_one_isbn_add)
+        self.isbn_add_dialog = ProgressDialog(_('Adding'),
+                _('Creating book records from ISBNs'), max=len(books),
+                cancelable=False, parent=self.gui)
+        self.isbn_add_dialog.exec_()
+
+    def do_one_isbn_add(self):
+        try:
+            db = self.gui.library_view.model().db
+
+            try:
+                x = self.isbn_books.pop(0)
+            except IndexError:
+                self.gui.library_view.model().books_added(self.isbn_add_dialog.value)
+                self.isbn_add_dialog.accept()
+                orig = config['overwrite_author_title_metadata']
+                config['overwrite_author_title_metadata'] = True
+                try:
+                    self.gui.iactions['Edit Metadata'].do_download_metadata(
+                            self.add_by_isbn_ids)
+                finally:
+                    config['overwrite_author_title_metadata'] = orig
+                return
+

-        for x in books:
            mi = MetaInformation(None)
            mi.isbn = x['isbn']
-            if x['path'] is not None:
-                ids.add(db.import_book(mi, [x['path']]))
-            else:
-                ids.add(db.import_book(mi, []))
-        self.gui.library_view.model().books_added(len(books))
-        orig = config['overwrite_author_title_metadata']
-        config['overwrite_author_title_metadata'] = True
-        try:
-            self.gui.iactions['Edit Metadata'].do_download_metadata(ids)
-        finally:
-            config['overwrite_author_title_metadata'] = orig
-        if add_tags and ids:
-            db.bulk_modify_tags(ids, add=add_tags)
-
+            if self.isbn_add_tags:
+                mi.tags = list(self.isbn_add_tags)
+            fmts = [] if x['path'] is None else [x['path']]
+            self.add_by_isbn_ids.add(db.import_book(mi, fmts))
+            self.isbn_add_dialog.value += 1
+            QTimer.singleShot(10, self.do_one_isbn_add)
+        except:
+            self.isbn_add_dialog.accept()
+            raise

    def files_dropped(self, paths):
        to_device = self.gui.stack.currentIndex() != 0
--- a/src/calibre/gui2/actions/device.py
+++ b/src/calibre/gui2/actions/device.py
@ -7,13 +7,14 @@ __docformat__ = 'restructuredtext en'

 from functools import partial

-from PyQt4.Qt import QToolButton, QMenu, pyqtSignal, QIcon
+from PyQt4.Qt import QToolButton, QMenu, pyqtSignal, QIcon, QTimer

 from calibre.gui2.actions import InterfaceAction
 from calibre.utils.smtp import config as email_config
 from calibre.constants import iswindows, isosx
 from calibre.customize.ui import is_disabled
 from calibre.devices.bambook.driver import BAMBOOK
+from calibre.gui2 import info_dialog

 class ShareConnMenu(QMenu): # {{{

@ -169,5 +170,20 @@ class ConnectShareAction(InterfaceAction):
        if self.gui.content_server is None:
           self.gui.start_content_server()
        else:
-            self.gui.content_server.exit()
-            self.gui.content_server = None
+            self.gui.content_server.threaded_exit()
+            self.stopping_msg = info_dialog(self.gui, _('Stopping'),
+                    _('Stopping server, this could take upto a minute, please wait...'),
+                    show_copy_button=False)
+            QTimer.singleShot(1000, self.check_exited)
+
+    def check_exited(self):
+        if self.gui.content_server.is_running:
+            QTimer.singleShot(20, self.check_exited)
+            if not self.stopping_msg.isVisible():
+                self.stopping_msg.exec_()
+            return
+
+
+        self.gui.content_server = None
+        self.stopping_msg.accept()
+
--- a/src/calibre/gui2/preferences/server.py
+++ b/src/calibre/gui2/preferences/server.py
@ -8,7 +8,7 @@ __docformat__ = 'restructuredtext en'
 import time

 from PyQt4.Qt import Qt, QUrl, QDialog, QSize, QVBoxLayout, QLabel, \
-    QPlainTextEdit, QDialogButtonBox
+    QPlainTextEdit, QDialogButtonBox, QTimer

 from calibre.gui2.preferences import ConfigWidgetBase, test_widget
 from calibre.gui2.preferences.server_ui import Ui_Form
@ -16,7 +16,8 @@ from calibre.utils.search_query_parser import saved_searches
 from calibre.library.server import server_config
 from calibre.utils.config import ConfigProxy
 from calibre.gui2 import error_dialog, config, open_url, warning_dialog, \
-        Dispatcher
+        Dispatcher, info_dialog
+from calibre import as_unicode

 class ConfigWidget(ConfigWidgetBase, Ui_Form):

@ -67,25 +68,36 @@ class ConfigWidget(ConfigWidgetBase, Ui_Form):

    def start_server(self):
        self.set_server_options()
-        from calibre.library.server.main import start_threaded_server
-        self.server = start_threaded_server(self.db, server_config().parse())
-        while not self.server.is_running and self.server.exception is None:
+        self.gui.start_content_server(check_started=False)
+        while not self.gui.content_server.is_running and self.gui.content_server.exception is None:
            time.sleep(1)
-        if self.server.exception is not None:
+        if self.gui.content_server.exception is not None:
            error_dialog(self, _('Failed to start content server'),
-                         unicode(self.server.exception)).exec_()
+                    as_unicode(self.gui.content_server.exception)).exec_()
            return
        self.start_button.setEnabled(False)
        self.test_button.setEnabled(True)
        self.stop_button.setEnabled(True)

    def stop_server(self):
-        from calibre.library.server.main import stop_threaded_server
-        stop_threaded_server(self.server)
-        self.server = None
+        self.gui.content_server.threaded_exit()
+        self.stopping_msg = info_dialog(self, _('Stopping'),
+                _('Stopping server, this could take upto a minute, please wait...'),
+                show_copy_button=False)
+        QTimer.singleShot(500, self.check_exited)
+
+    def check_exited(self):
+        if self.gui.content_server.is_running:
+            QTimer.singleShot(20, self.check_exited)
+            if not self.stopping_msg.isVisible():
+                self.stopping_msg.exec_()
+            return
+
+        self.gui.content_server = None
        self.start_button.setEnabled(True)
        self.test_button.setEnabled(False)
        self.stop_button.setEnabled(False)
+        self.stopping_msg.accept()

    def test_server(self):
        open_url(QUrl('http://127.0.0.1:'+str(self.opt_port.value())))
--- a/src/calibre/gui2/ui.py
+++ b/src/calibre/gui2/ui.py
@ -307,7 +307,7 @@ class Main(MainWindow, MainWindowMixin, DeviceMixin, EmailMixin, # {{{
    def esc(self, *args):
        self.search.clear()

-    def start_content_server(self):
+    def start_content_server(self, check_started=True):
        from calibre.library.server.main import start_threaded_server
        from calibre.library.server import server_config
        self.content_server = start_threaded_server(
@ -315,7 +315,8 @@ class Main(MainWindow, MainWindowMixin, DeviceMixin, EmailMixin, # {{{
        self.content_server.state_callback = Dispatcher(
                self.iactions['Connect Share'].content_server_state_changed)
        self.content_server.state_callback(True)
-        self.test_server_timer = QTimer.singleShot(10000, self.test_server)
+        if check_started:
+            QTimer.singleShot(10000, self.test_server)

    def resizeEvent(self, ev):
        MainWindow.resizeEvent(self, ev)
--- a/src/calibre/library/catalog.py
+++ b/src/calibre/library/catalog.py
@ -1481,23 +1481,36 @@ class EPUB_MOBI(CatalogPlugin):
            current_author = authors[0]
            for (i,author) in enumerate(authors):
                if author != current_author and i:
-                    # Exit if author matches previous, but author_sort doesn't match
                    if author[0] == current_author[0]:
-                        error_msg = _('''
-Inconsistent Author Sort values for Author '{0}':
-'{1}' <> '{2}',
-unable to build catalog.\n
-Select all books by '{0}', apply correct Author Sort value in Edit Metadata dialog,
-then rebuild the catalog.\n''').format(author[0],author[1],current_author[1])
-                        self.opts.log.warn('\n*** Metadata error ***')
-                        self.opts.log.warn(error_msg)
+                        if self.opts.fmt == 'mobi':
+                            # Exit if building MOBI
+                            error_msg = _(
+'''Inconsistent Author Sort values for
+Author '{0}':
+'{1}' <> '{2}'
+Unable to build MOBI catalog.\n
+Select all books by '{0}', apply correct Author Sort value in Edit Metadata dialog, then rebuild the catalog.\n''').format(author[0],author[1],current_author[1])
+                            self.opts.log.warn('\n*** Metadata error ***')
+                            self.opts.log.warn(error_msg)
+
+                            self.error.append('Author Sort mismatch')
+                            self.error.append(error_msg)
+                            return False
+                        else:
+                            # Warning if building non-MOBI
+                            if not self.error:
+                                self.error.append('Author Sort mismatch')
+
+                            error_msg = _(
+'''Warning: inconsistent Author Sort values for
+Author '{0}':
+'{1}' <> '{2}'\n''').format(author[0],author[1],current_author[1])
+                            self.opts.log.warn('\n*** Metadata warning ***')
+                            self.opts.log.warn(error_msg)
+                            self.error.append(error_msg)

-                        self.error.append('Metadata error')
-                        self.error.append(error_msg)
-                        return False
                    current_author = author

-
            self.booksByAuthor = sorted(self.booksByAuthor, key=self.booksByAuthorSorter_author_sort)

            # Build the unique_authors set from existing data
@ -2135,7 +2148,7 @@ then rebuild the catalog.\n''').format(author[0],author[1],current_author[1])
                if author_count == 1:
                    divOpeningTag.insert(dotc, pBookTag)
                    dotc += 1
-                else:
+                elif divRunningTag:
                    divRunningTag.insert(drtc,pBookTag)
                    drtc += 1

--- a/src/calibre/library/server/base.py
+++ b/src/calibre/library/server/base.py
@ -120,6 +120,8 @@ class LibraryServer(ContentServer, MobileServer, XMLServer, OPDSServer, Cache,

        self.set_database(db)

+        st = 0.1 if opts.develop else 1
+
        cherrypy.config.update({
            'log.screen'             : opts.develop,
            'engine.autoreload_on'   : getattr(opts,
@ -131,6 +133,7 @@ class LibraryServer(ContentServer, MobileServer, XMLServer, OPDSServer, Cache,
            'server.socket_port'     : opts.port,
            'server.socket_timeout'  : opts.timeout, #seconds
            'server.thread_pool'     : opts.thread_pool, # number of threads
+            'server.shutdown_timeout': st, # minutes
        })
        if embedded or wsgi:
            cherrypy.config.update({'engine.SIGHUP'          : None,
@ -241,4 +244,9 @@ class LibraryServer(ContentServer, MobileServer, XMLServer, OPDSServer, Cache,
            except:
                pass

+    def threaded_exit(self):
+        from threading import Thread
+        t = Thread(target=self.exit)
+        t.daemon = True
+        t.start()

--- a/src/calibre/library/server/browse.py
+++ b/src/calibre/library/server/browse.py
@ -584,7 +584,7 @@ class BrowseServer(object):
                title=_('Books in') + " " +category_name,
                script='booklist(%s);'%hide_sort, main=html)

-    def browse_get_book_args(self, mi, id_):
+    def browse_get_book_args(self, mi, id_, add_category_links=False):
        fmts = self.db.formats(id_, index_is_id=True)
        if not fmts:
            fmts = ''
@ -596,11 +596,43 @@ class BrowseServer(object):
            fmt = None
        args = {'id':id_, 'mi':mi,
                }
+        ccache = self.categories_cache() if add_category_links else {}
        for key in mi.all_field_keys():
            val = mi.format_field(key)[1]
            if not val:
                val = ''
-            args[key] = xml(val, True)
+            if add_category_links:
+                added_key = False
+                if val and key in ('authors', 'publisher', 'series', 'tags'):
+                    categories = mi.get(key)
+                    if isinstance(categories, basestring):
+                        categories = [categories]
+                    dbtags = []
+                    for category in categories:
+                        dbtag = None
+                        for tag in ccache[key]:
+                            if tag.name == category:
+                                dbtag = tag
+                                break
+                        dbtags.append(dbtag)
+                    if None not in dbtags:
+                        vals = []
+                        for tag in dbtags:
+                            tval = ('<a title="Browse books by {3}: {0}"'
+                            ' href="{1}" class="details_category_link">{2}</a>')
+                            href='/browse/matches/%s/%s' % \
+                            (quote(tag.category), quote(str(tag.id)))
+                            vals.append(tval.format(xml(tag.name, True),
+                                xml(href, True),
+                                xml(val if len(dbtags) == 1 else tag.name),
+                                xml(key, True)))
+                        join = ' &amp; ' if key == 'authors' else ', '
+                        args[key] = join.join(vals)
+                        added_key = True
+                if not added_key:
+                    args[key] = xml(val, True)
+            else:
+                args[key] = xml(val, True)
        fname = quote(ascii_filename(args['title']) + ' - ' +
                ascii_filename(args['authors']))
        return args, fmt, fmts, fname
@ -674,7 +706,8 @@ class BrowseServer(object):
        except:
            return _('This book has been deleted')
        else:
-            args, fmt, fmts, fname = self.browse_get_book_args(mi, id_)
+            args, fmt, fmts, fname = self.browse_get_book_args(mi, id_,
+                    add_category_links=True)
            args['formats'] = ''
            if fmts:
                ofmts = [u'<a href="{4}/get/{0}/{1}_{2}.{0}" title="{3}">{3}</a>'\
@ -690,8 +723,9 @@ class BrowseServer(object):
                if m['is_custom'] and field not in displayed_custom_fields:
                    continue
                if m['datatype'] == 'comments' or field == 'comments':
-                    comments.append((m['name'], comments_to_html(mi.get(field,
-                        ''))))
+                    val = mi.get(field, '')
+                    if val and val.strip():
+                        comments.append((m['name'], comments_to_html(val)))
                    continue
                if field in ('title', 'formats') or not args.get(field, False) \
                        or not m['name']:
--- a/src/calibre/translations/calibre.pot
+++ b/src/calibre/translations/calibre.pot
--- a/src/calibre/utils/logging.py
+++ b/src/calibre/utils/logging.py
@ -10,17 +10,19 @@ INFO  = 1
 WARN  = 2
 ERROR = 3

-import sys, traceback
+import sys, traceback, cStringIO
 from functools import partial
-
+from threading import RLock



 class Stream(object):

-    def __init__(self, stream):
+    def __init__(self, stream=None):
        from calibre import prints
        self._prints = partial(prints, safe_encode=True)
+        if stream is None:
+            stream = cStringIO.StringIO()
        self.stream = stream

    def flush(self):
@ -50,6 +52,15 @@ class ANSIStream(Stream):
    def flush(self):
        self.stream.flush()

+class FileStream(Stream):
+
+    def __init__(self, stream=None):
+        Stream.__init__(self, stream)
+
+    def prints(self, level, *args, **kwargs):
+        kwargs['file'] = self.stream
+        self._prints(*args, **kwargs)
+
 class HTMLStream(Stream):

    def __init__(self, stream=sys.stdout):
@ -103,4 +114,14 @@ class Log(object):
    def __call__(self, *args, **kwargs):
        self.prints(INFO, *args, **kwargs)

+class ThreadSafeLog(Log):
+
+    def __init__(self, level=Log.INFO):
+        Log.__init__(self, level=level)
+        self._lock = RLock()
+
+    def prints(self, *args, **kwargs):
+        with self._lock:
+            Log.prints(self, *args, **kwargs)
+
 default_log = Log()