Sync to trunk.

2025-08-30 23:00:21 -04:00 · 2010-02-07 09:18:48 -05:00 · 2010-02-07 09:18:48 -05:00 · 3afcb3b2a8
commit 3afcb3b2a8
parent c2e3683843 3ae86efb6f
34 changed files with 775 additions and 101 deletions
--- a/resources/catalog/stylesheet.css
+++ b/resources/catalog/stylesheet.css
@ -27,7 +27,7 @@ p.tags {
 p.description {
 	text-align:left;
-	font-style:italic;
+	font-style:normal;
 	margin-top: 0em;
 	}
@ -55,6 +55,14 @@ p.author_index {
 	text-indent: 0em;
 	}
 p.series {
 	text-align: left;
 	margin-top:0px;
 	margin-bottom:0px;
 	margin-left:2em;
 	text-indent:-2em;
 	}
 p.read_book {
 	text-align:left;
 	margin-top:0px;
--- a/resources/images/news/digitalspy_uk.png
+++ b/resources/images/news/digitalspy_uk.png
--- a/resources/images/news/elcomercio.png
+++ b/resources/images/news/elcomercio.png
--- a/resources/images/news/gizmodo.png
+++ b/resources/images/news/gizmodo.png
--- a/resources/images/news/newsstraitstimes.png
+++ b/resources/images/news/newsstraitstimes.png
--- a/resources/images/news/readitlater.png
+++ b/resources/images/news/readitlater.png
--- a/resources/images/news/tidbits.png
+++ b/resources/images/news/tidbits.png
--- a/resources/recipes/ZIVE.sk.recipe
+++ b/resources/recipes/ZIVE.sk.recipe
@ -0,0 +1,45 @@
 from calibre.web.feeds.news import BasicNewsRecipe
 import re
 class ZiveRecipe(BasicNewsRecipe):
    __license__  = 'GPL v3'
    __author__ = 'Abelturd'
    language = 'sk'
    version = 1
    title = u'ZIVE.sk'
    publisher = u''
    category = u'News, Newspaper'
    description = u'Naj\u010d\xedtanej\u0161\xed denn\xedk opo\u010d\xedta\u010doch, IT a internete. '
    encoding = 'UTF-8'
    oldest_article = 7
    max_articles_per_feed = 100
    use_embedded_content = False
    remove_empty_feeds = True
    no_stylesheets = True
    remove_javascript = True
    cover_url = 'http://www.zive.sk/Client.Images/Logos/logo-zive-sk.gif'
    feeds = []
    feeds.append((u'V\u0161etky \u010dl\xe1nky', u'http://www.zive.sk/rss/sc-47/default.aspx'))
    preprocess_regexps = [
        (re.compile(r'<p><p><strong>Pokra.*ie</strong></p>', re.DOTALL|re.IGNORECASE),
        lambda match: ''),
     ]
    remove_tags = []
    keep_only_tags = [dict(name='h1'), dict(name='span', attrs={'class':'arlist-data-info-author'}), dict(name='div', attrs={'class':'bbtext font-resizer-area'}),]
    extra_css = '''
                h1 {font-size:140%;font-family:georgia,serif; font-weight:bold}
                h3 {font-size:115%;font-family:georgia,serif; font-weight:bold}
                '''
--- a/resources/recipes/digitalspy_uk.recipe
+++ b/resources/recipes/digitalspy_uk.recipe
@ -0,0 +1,43 @@
 __license__   = 'GPL v3'
 __copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
 '''
 www.digitalspy.co.uk
 '''
 from calibre.web.feeds.news import BasicNewsRecipe
 class DigitalSpyUK(BasicNewsRecipe):
    title                 = 'Digital Spy - UK Edition'
    __author__            = 'Darko Miletic'
    description           = 'Entertainment news about the biggest TV shows, films and celebrities, updated around the clock.'
    publisher             = 'Digital Spy Limited.'
    category              = 'news, showbiz, big brother, x factor, torchwood, doctor who, tv, media, sky, freeview, cable'
    oldest_article        = 2
    max_articles_per_feed = 100
    no_stylesheets        = True
    encoding              = 'cp1252'
    use_embedded_content  = False
    language              = 'en_GB'
    remove_empty_feeds    = True
    extra_css             = ' body{font-family: Verdana,Arial,Helvetica,sans-serif } img{margin-bottom: 0.4em} .info{font-size: small} '
    conversion_options = {
                          'comment'   : description
                        , 'tags'      : category
                        , 'publisher' : publisher
                        , 'language'  : language
                        }
    remove_tags       = [dict(name=['link'])]
    remove_attributes = ['height','width']
    keep_only_tags    = [dict(name='div',attrs={'id':'article'})]
    feeds = [
              (u'News'          , u'http://www.digitalspy.co.uk/rss/zones/gb/all.xml'          )
             ,(u'Big Brother'   , u'http://www.digitalspy.co.uk/rss/zones/gb/bigbrother.xml'   )
             ,(u'Entertainment' , u'http://www.digitalspy.co.uk/rss/zones/gb/entertainment.xml')
             ,(u'General'       , u'http://www.digitalspy.co.uk/rss/zones/gb/general.xml'      )
             ,(u'Media'         , u'http://www.digitalspy.co.uk/rss/zones/gb/media.xml'        )
            ]
--- a/resources/recipes/elcomercio.recipe
+++ b/resources/recipes/elcomercio.recipe
@ -0,0 +1,38 @@
 __license__   = 'GPL v3'
 __copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
 '''
 elcomercio.com
 '''
 from calibre.web.feeds.news import BasicNewsRecipe
 class ElComercio(BasicNewsRecipe):
    title                 = 'El Comercio '
    __author__            = 'Darko Miletic'
    description           = "Gizmodo, the gadget guide. So much in love with shiny new toys, it's unnatural."
    publisher             = 'GRUPO EL COMERCIO C.A.'
    category              = 'news, Ecuador, politics'
    oldest_article        = 2
    max_articles_per_feed = 100
    no_stylesheets        = True
    encoding              = 'utf-8'
    use_embedded_content  = True
    language              = 'es'
    masthead_url          = 'http://ww1.elcomercio.com/nv_images/headers/EC/logo_new_08.gif'
    extra_css             = ' body{font-family: Arial,Verdana,sans-serif} img{margin-bottom: 1em} '
    conversion_options = {
                          'comment'   : description
                        , 'tags'      : category
                        , 'publisher' : publisher
                        , 'language'  : language
                        }
    remove_attributes = ['width','height']
    feeds = [(u'Articles', u'http://ww1.elcomercio.com/rss/titulares1.xml')]
    def preprocess_html(self, soup):
        return self.adeify_images(soup)
--- a/resources/recipes/gizmodo.recipe
+++ b/resources/recipes/gizmodo.recipe
@ -0,0 +1,40 @@
 __license__   = 'GPL v3'
 __copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
 '''
 gizmodo.com
 '''
 from calibre.web.feeds.news import BasicNewsRecipe
 class Gizmodo(BasicNewsRecipe):
    title                 = 'Gizmodo'
    __author__            = 'Darko Miletic'
    description           = "Gizmodo, the gadget guide. So much in love with shiny new toys, it's unnatural."
    publisher             = 'gizmodo.com'
    category              = 'news, IT, Internet, gadgets'
    oldest_article        = 2
    max_articles_per_feed = 100
    no_stylesheets        = True
    encoding              = 'utf-8'
    use_embedded_content  = True
    language              = 'en'
    masthead_url          = 'http://cache.gawkerassets.com/assets/gizmodo.com/img/logo.png'
    extra_css             = ' body{font-family: "Lucida Grande",Helvetica,Arial,sans-serif} img{margin-bottom: 1em} '
    conversion_options = {
                          'comment'   : description
                        , 'tags'      : category
                        , 'publisher' : publisher
                        , 'language'  : language
                        }
    remove_attributes = ['width','height']
    remove_tags       = [dict(name='div',attrs={'class':'feedflare'})]
    remove_tags_after = dict(name='div',attrs={'class':'feedflare'})
    feeds = [(u'Articles', u'http://feeds.gawker.com/gizmodo/full')]
    def preprocess_html(self, soup):
        return self.adeify_images(soup)
--- a/resources/recipes/hbr.recipe
+++ b/resources/recipes/hbr.recipe
@ -18,7 +18,8 @@ class HBR(BasicNewsRecipe):
    remove_tags = [dict(id=['mastheadContainer', 'magazineHeadline',
        'articleToolbarTopRD', 'pageRightSubColumn', 'pageRightColumn',
        'todayOnHBRListWidget', 'mostWidget', 'keepUpWithHBR',
-        'mailingListTout', 'partnerCenter', 'pageFooter']),
+        'mailingListTout', 'partnerCenter', 'pageFooter',
        'articleToolbarTop', 'articleToolbarBottom', 'articleToolbarRD']),
        dict(name='iframe')]
    extra_css = '''
                a {font-family:Georgia,"Times New Roman",Times,serif; font-style:italic; color:#000000; }
--- a/resources/recipes/iliteratura_cz.recipe
+++ b/resources/recipes/iliteratura_cz.recipe
@ -0,0 +1,47 @@
 from calibre.web.feeds.news import BasicNewsRecipe
 import re
 class SmeRecipe(BasicNewsRecipe):
    __license__  = 'GPL v3'
    __author__ = 'Abelturd'
    language = 'cz'
    version = 1
    title = u'iLiteratura.cz'
    publisher = u''
    category = u'News, Newspaper'
    description = u'O LITERATU\u0158E V CEL\xc9M SV\u011aT\u011a A DOMA'
    cover_url = 'http://www.iliteratura.cz/1_vzhled/1/iliteratura.gif'
    oldest_article = 7
    max_articles_per_feed = 100
    use_embedded_content = False
    remove_empty_feeds = True
    no_stylesheets = True
    remove_javascript = True
    feeds = []
    feeds.append((u'\u010cl\xe1nky', u'http://www.iliteratura.cz/rss.asp'))
    keep_only_tags = []
    remove_tags = [dict(name='table'),dict(name='h3')]
    preprocess_regexps = [
        (re.compile(r'<h3>Souvisej.*</body>', re.DOTALL|re.IGNORECASE),
        lambda match: ''),
     ]
    def print_version(self, url):
         m = re.search('(?<=ID=)[0-9]*', url)
         return u'http://www.iliteratura.cz/clanek.asp?polozkaID=' + str(m.group(0)) + '&c=tisk'
    extra_css = '''
                  h1 {font-size:140%;font-family:georgia,serif; font-weight:bold}
                  h3 {font-size:115%;font-family:georgia,serif; font-weight:bold}
                '''
--- a/resources/recipes/metro_montreal.recipe
+++ b/resources/recipes/metro_montreal.recipe
@ -4,7 +4,7 @@ class Metro_Montreal(BasicNewsRecipe):
    title          = u'M\xe9tro Montr\xe9al'
    __author__     = 'Jerry Clapperton'
-    description    = 'Le quotidien le plus branché sur le monde'
+    description    = 'Le quotidien le plus branch\xe9 sur le monde'
    language       = 'fr'
    oldest_article        = 7
@ -16,7 +16,7 @@ class Metro_Montreal(BasicNewsRecipe):
    extra_css             = '.headline {font-size: x-large;} \n .fact {padding-top: 10pt}'
    remove_tags = [dict(attrs={'id':'buttons'})]
-    
+
    feeds = [
             (u"L'info", u'http://journalmetro.com/linfo/rss'),
             (u'Monde', u'http://journalmetro.com/monde/rss'),
@ -26,4 +26,4 @@ class Metro_Montreal(BasicNewsRecipe):
            ]
    def print_version(self, url):
-          return url.replace('article', 'ArticlePrint') + '?language=fr'
+          return url.replace('article', 'ArticlePrint') + '?language=fr'
--- a/resources/recipes/newsstraitstimes.recipe
+++ b/resources/recipes/newsstraitstimes.recipe
@ -0,0 +1,35 @@
 __license__   = 'GPL v3'
 __copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
 '''
 www.nst.com.my
 '''
 from calibre.web.feeds.news import BasicNewsRecipe
 class Newstraitstimes(BasicNewsRecipe):
    title                 = 'New Straits Times from Malaysia'
    __author__            = 'Darko Miletic'
    description           = 'Learning Curve, Sunday People, New Straits Times from Malaysia'
    publisher             = 'nst.com.my'
    category              = 'news, politics, Malaysia'
    oldest_article        = 2
    max_articles_per_feed = 100
    no_stylesheets        = True
    encoding              = 'cp1252'
    use_embedded_content  = False
    language              = 'en'
    masthead_url          = 'http://www.nst.com.my/Current_News/NST/Images/new-nstonline.jpg'
    conversion_options = {
                          'comment'   : description
                        , 'tags'      : category
                        , 'publisher' : publisher
                        , 'language'  : language
                        }
    remove_tags       = [dict(name=['link','table'])]
    keep_only_tags = dict(name='div',attrs={'id':'haidah'})
    feeds = [(u'Articles', u'http://www.nst.com.my/rss/allSec')]
--- a/resources/recipes/pagina12.recipe
+++ b/resources/recipes/pagina12.recipe
@ -1,13 +1,12 @@
 __license__   = 'GPL v3'
 __copyright__ = '2008-2010, Darko Miletic <darko.miletic at gmail.com>'
 '''
 pagina12.com.ar
 '''
-import time
+import re
 from calibre import strftime
 from calibre.web.feeds.news import BasicNewsRecipe
 from calibre.ebooks.BeautifulSoup import BeautifulSoup
 class Pagina12(BasicNewsRecipe):
    title                 = 'Pagina - 12'
@ -22,7 +21,8 @@ class Pagina12(BasicNewsRecipe):
    use_embedded_content  = False
    language              = 'es'
    remove_empty_feeds    = True
-    extra_css             = ' body{font-family: sans-serif} '
+    masthead_url          = 'http://www.pagina12.com.ar/commons/imgs/logo-home.gif'
    extra_css             = ' body{font-family: Arial,Helvetica,sans-serif } h2{color: #028CCD} img{margin-bottom: 0.4em} .epigrafe{font-size: x-small; background-color: #EBEAE5; color: #565144 } .intro{font-size: 1.1em} '
    conversion_options = {
                          'comment'   : description
@ -52,7 +52,11 @@ class Pagina12(BasicNewsRecipe):
        return url.replace('http://www.pagina12.com.ar/','http://www.pagina12.com.ar/imprimir/')
    def get_cover_url(self):
-        imgnames = ['tapan.jpg','tapagn.jpg','tapan_gr.jpg','tapagn.jpg','tapagn.jpg','tapan.jpg','tapagn.jpg']
+        rawc = self.index_to_soup('http://www.pagina12.com.ar/diario/principal/diario/index.html',True)
-        weekday = time.localtime().tm_wday
+        rawc2 = re.sub(r'PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN','PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"',rawc)
-        return strftime('http://www.pagina12.com.ar/fotos/%Y%m%d/diario/') + imgnames[weekday]
+        soup = BeautifulSoup(rawc2,fromEncoding=self.encoding,smartQuotesTo=None)
        for image in soup.findAll('img',alt=True):
           if image['alt'].startswith('Tapa de la fecha'):
              return image['src']
        return None
--- a/resources/recipes/people_us_mashup.recipe
+++ b/resources/recipes/people_us_mashup.recipe
@ -31,7 +31,7 @@ class PeopleMag(BasicNewsRecipe):
    keep_only_tags = [
-              dict(name='div', attrs={'class': 'panel_news_article_main'}),
+              dict(name='div', attrs={'class': 'panel_news_article_main'}), 	
 	        dict(name='div', attrs={'class':'article_content'}),
              dict(name='div', attrs={'class': 'headline'}),
              dict(name='div', attrs={'class': 'post'}),
@ -51,6 +51,7 @@ class PeopleMag(BasicNewsRecipe):
         dict(name='div', attrs={'class':'sharelinkcont'}),
         dict(name='div', attrs={'class':'categories'}),
         dict(name='ul', attrs={'class':'categories'}),
         dict(name='div', attrs={'class':'related_content'}),
         dict(name='div', attrs={'id':'promo'}),
         dict(name='div', attrs={'class':'linksWrapper'}),
         dict(name='p', attrs={'class':'tag tvnews'}),
--- a/resources/recipes/readitlater.recipe
+++ b/resources/recipes/readitlater.recipe
@ -0,0 +1,64 @@
 __license__   = 'GPL v3'
 __copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
 '''
 readitlaterlist.com
 '''
 from calibre import strftime
 from calibre.web.feeds.news import BasicNewsRecipe
 class Readitlater(BasicNewsRecipe):
    title                 = 'Read It Later'
    __author__            = 'Darko Miletic'
    description           = '''Personalized news feeds. Go to readitlaterlist.com to
                               setup up your news. Fill in your account
                               username, and optionally you can add password.'''
    publisher             = 'readitlater.com'
    category              = 'news, custom'
    oldest_article        = 7
    max_articles_per_feed = 100
    no_stylesheets        = True
    use_embedded_content  = False
    needs_subscription    = True
    INDEX                 = u'http://readitlaterlist.com'
    LOGIN                 = INDEX + u'/l'
    feeds = [(u'Unread articles' , INDEX + u'/unread')]
    def get_browser(self):
        br = BasicNewsRecipe.get_browser()
        if self.username is not None:
            br.open(self.LOGIN)
            br.select_form(nr=0)
            br['feed_id'] = self.username
            if self.password is not None:
               br['password'] = self.password
            br.submit()
        return br
    def parse_index(self):
        totalfeeds = []
        lfeeds = self.get_feeds()
        for feedobj in lfeeds:
            feedtitle, feedurl = feedobj
            self.report_progress(0, _('Fetching feed')+' %s...'%(feedtitle if feedtitle else feedurl))
            articles = []
            soup = self.index_to_soup(feedurl)
            ritem = soup.find('ul',attrs={'id':'list'})
            for item in ritem.findAll('li'):
                description = ''
                atag = item.find('a',attrs={'class':'text'})
                if atag and atag.has_key('href'):
                    url         = self.INDEX + atag['href']
                    title       = self.tag_to_string(item.div)
                    date        = strftime(self.timefmt)
                    articles.append({
                                      'title'      :title
                                     ,'date'       :date
                                     ,'url'        :url
                                     ,'description':description
                                    })
            totalfeeds.append((feedtitle, articles))
        return totalfeeds
--- a/resources/recipes/the_gazette.recipe
+++ b/resources/recipes/the_gazette.recipe
@ -1,22 +0,0 @@
 from calibre.web.feeds.news import BasicNewsRecipe
 class The_Gazette(BasicNewsRecipe):
    cover_url      = 'file:///D:/Documents/Pictures/Covers/The_Gazette.jpg'
    title          = u'The Gazette'
    __author__     = 'Jerry Clapperton'
    description    = 'Montreal news in English'
    language = 'en_CA'
    oldest_article = 7
    max_articles_per_feed = 20
    use_embedded_content  = False
    remove_javascript = True
    no_stylesheets = True
    encoding = 'utf-8'
    keep_only_tags = [dict(name='div', attrs={'id':['storyheader','page1']})]
    extra_css = '.headline {font-size: x-large;} \n .fact {padding-top: 10pt}'
    feeds = [(u'News', u'http://feeds.canada.com/canwest/F297'), (u'Opinion', u'http://feeds.canada.com/canwest/F7383'), (u'Arts', u'http://feeds.canada.com/canwest/F7366'), (u'Life', u'http://rss.canada.com/get/?F6934'), (u'Business', u'http://feeds.canada.com/canwest/F6939'), (u'Travel', u'http://rss.canada.com/get/?F6938'), (u'Health', u'http://feeds.canada.com/canwest/F7397'), (u'Technology', u'http://feeds.canada.com/canwest/F7411')]
--- a/resources/recipes/the_new_republic.recipe
+++ b/resources/recipes/the_new_republic.recipe
@ -9,6 +9,7 @@ class The_New_Republic(BasicNewsRecipe):
    oldest_article = 7
    max_articles_per_feed = 100
    no_stylesheets = True
    remove_tags = [
            dict(name='div', attrs={'class':['print-logo', 'print-site_name', 'img-left', 'print-source_url']}),
@ -21,14 +22,15 @@ class The_New_Republic(BasicNewsRecipe):
        ('Economy', 'http://www.tnr.com/rss/articles/Economy'),
        ('Environment and Energy', 'http://www.tnr.com/rss/articles/Environment-%2526-Energy'),
        ('Health Care', 'http://www.tnr.com/rss/articles/Health-Care'),
-        ('Urban Policy', 'http://www.tnr.com/rss/articles/Urban-Policy'),
+        ('Metro Policy', 'http://www.tnr.com/rss/articles/Metro-Policy'),
        ('World', 'http://www.tnr.com/rss/articles/World'),
        ('Film', 'http://www.tnr.com/rss/articles/Film'),
        ('Books', 'http://www.tnr.com/rss/articles/books'),
        ('The Book', 'http://www.tnr.com/rss/book'),
        ('Jonathan Chait', 'http://www.tnr.com/rss/blogs/Jonathan-Chait'),
        ('The Plank', 'http://www.tnr.com/rss/blogs/The-Plank'),
        ('The Treatment', 'http://www.tnr.com/rss/blogs/The-Treatment'),
        ('The Spine', 'http://www.tnr.com/rss/blogs/The-Spine'),
        ('The Stash', 'http://www.tnr.com/rss/blogs/The-Stash'),
        ('The Vine', 'http://www.tnr.com/rss/blogs/The-Vine'),
        ('The Avenue', 'http://www.tnr.com/rss/blogs/The-Avenue'),
        ('William Galston', 'http://www.tnr.com/rss/blogs/William-Galston'),
@ -40,3 +42,4 @@ class The_New_Republic(BasicNewsRecipe):
    def print_version(self, url):
        return url.replace('http://www.tnr.com/', 'http://www.tnr.com/print/')
--- a/resources/recipes/tidbits.recipe
+++ b/resources/recipes/tidbits.recipe
@ -0,0 +1,53 @@
 __license__   = 'GPL v3'
 __copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
 '''
 db.tidbits.com
 '''
 from calibre.web.feeds.news import BasicNewsRecipe
 class TidBITS(BasicNewsRecipe):
    title                 = 'TidBITS: Mac News for the Rest of Us'
    __author__            = 'Darko Miletic'
    description           = 'Insightful news, reviews, and analysis of the Macintosh and Internet worlds'
    publisher             = 'TidBITS Publishing Inc.'
    category              = 'news, Apple, Macintosh, IT, Internet'
    oldest_article        = 2
    max_articles_per_feed = 100
    no_stylesheets        = True
    encoding              = 'utf-8'
    use_embedded_content  = True
    language              = 'en'
    remove_empty_feeds    = True
    masthead_url          = 'http://db.tidbits.com/images/tblogo9.gif'
    extra_css             = ' body{font-family: Georgia,"Times New Roman",Times,serif} '
    conversion_options = {
                          'comment'   : description
                        , 'tags'      : category
                        , 'publisher' : publisher
                        , 'language'  : language
                        }
    remove_attributes = ['width','height']
    remove_tags       = [dict(name='small')]
    remove_tags_after = dict(name='small')
    feeds = [
               (u'Business Apps'              , u'http://db.tidbits.com/feeds/business.rss'     )
              ,(u'Entertainment'              , u'http://db.tidbits.com/feeds/entertainment.rss')
              ,(u'External Links'             , u'http://db.tidbits.com/feeds/links.rss'        )
              ,(u'Home Mac'                   , u'http://db.tidbits.com/feeds/home.rss'         )
              ,(u'Inside TidBITS'             , u'http://db.tidbits.com/feeds/inside.rss'       )
              ,(u'iPod & iPhone'              , u'http://db.tidbits.com/feeds/ipod-iphone.rss'  )
              ,(u'Just for Fun'               , u'http://db.tidbits.com/feeds/fun.rss'          )
              ,(u'Macs & Mac OS X'            , u'http://db.tidbits.com/feeds/macs.rss'         )
              ,(u'Media Creation'             , u'http://db.tidbits.com/feeds/creative.rss'     )
              ,(u'Networking & Communications', u'http://db.tidbits.com/feeds/net.rss'          )
              ,(u'Opinion & Editorial'        , u'http://db.tidbits.com/feeds/opinion.rss'      )
              ,(u'Support & Problem Solving'  , u'http://db.tidbits.com/feeds/support.rss'      )
              ,(u'Safe Computing'             , u'http://db.tidbits.com/feeds/security.rss'     )
              ,(u'Tech News'                  , u'http://db.tidbits.com/feeds/tech.rss'         )
              ,(u'Software Watchlist'         , u'http://db.tidbits.com/feeds/watchlist.rss'    )
            ]
--- a/resources/recipes/wsj_free.recipe
+++ b/resources/recipes/wsj_free.recipe
@ -215,7 +215,7 @@ class WSJ(BasicNewsRecipe):
                # first, check if there is an h3 tag which provides a section name
                stag = divtag.find('h3')
                if stag:
-                    if stag.parent['class'] == 'dynamic':
+                    if stag.parent.get('class', '') == 'dynamic':
                        # a carousel of articles is too complex to extract a section name
                        # for each article, so we'll just call the section "Carousel"
                        section_name = 'Carousel'
--- a/setup/resources.py
+++ b/setup/resources.py
@ -48,7 +48,9 @@ class Resources(Command):
        dest = self.j(self.RESOURCES, 'builtin_recipes.xml')
        if self.newer(dest, files):
            self.info('\tCreating builtin_recipes.xml')
-            open(dest, 'wb').write(serialize_builtin_recipes())
+            xml = serialize_builtin_recipes()
            with open(dest, 'wb') as f:
                f.write(xml)
        dest = self.j(self.RESOURCES, 'ebook-convert-complete.pickle')
        files = []
--- a/src/calibre/ebooks/pdf/reflow.py
+++ b/src/calibre/ebooks/pdf/reflow.py
@ -262,7 +262,6 @@ class Region(object):
            max_lines = max(max_lines, len(c))
        return max_lines
    @property
    def is_small(self):
        return self.line_count < 3
@ -438,9 +437,8 @@ class Page(object):
        # absorb into a neighboring region (prefer the one with number of cols
        # closer to the avg number of cols in the set, if equal use larger
        # region)
        # merge contiguous regions that can contain each other
        '''absorbed = set([])
        found = True
        absorbed = set([])
        while found:
            found = False
            for i, region in enumerate(self.regions):
@ -452,10 +450,33 @@ class Page(object):
                            regions.append(self.regions[j])
                        else:
                            break
-                    prev = None if i == 0 else i-1
+                    prev_region = None if i == 0 else i-1
-                    next = j if self.regions[j] not in regions else None
+                    next_region = j if self.regions[j] not in regions else None
-        '''
+                    if prev_region is None and next_region is not None:
-        pass
+                        absorb_into = next_region
                    elif next_region is None and prev_region is not None:
                        absorb_into = prev_region
                    elif prev_region is None and next_region is None:
                        if len(regions) > 1:
                            absorb_into = regions[0]
                            regions = regions[1:]
                        else:
                            absorb_into = None
                    else:
                        absorb_into = prev_region
                        if next_region.line_count >= prev_region.line_count:
                            avg_column_count = sum([len(r.columns) for r in
                                regions])/float(len(regions))
                            if next_region.line_count > prev_region.line_count \
                               or abs(avg_column_count - len(prev_region.columns)) \
                               > abs(avg_column_count - len(next_region.columns)):
                                   absorb_into = next_region
                    if absorb_into is not None:
                        absorb_into.absorb_region(regions)
                        absorbed.update(regions)
                    i = j
        for region in absorbed:
            self.regions.remove(region)
--- a/src/calibre/ebooks/rtf2xml/tokenize.py
+++ b/src/calibre/ebooks/rtf2xml/tokenize.py
@ -72,7 +72,7 @@ class Tokenize:
        return line
    def __compile_expressions(self):
        self.__ms_hex_exp = re.compile(r"\\\'(..)")
-        self.__utf_exp = re.compile(r"\\u(-?\d{3,6}) {0,1}")
+        self.__utf_exp = re.compile(r"\\u(-?\d{3,6})")
        self.__splitexp = re.compile(r"(\\[\\{}]|{|}|\\[^\s\\{}&]+(?:\s)?)")
        self.__par_exp = re.compile(r'\\$')
        self.__mixed_exp = re.compile(r"(\\[a-zA-Z]+\d+)(\D+)")
--- a/src/calibre/gui2/catalog/catalog_epub_mobi.ui
+++ b/src/calibre/gui2/catalog/catalog_epub_mobi.ui
@ -80,7 +80,7 @@
    <widget class="QLabel" name="label_6">
     <property name="text">
      <string>Regex tips:
- The default regex - \[[\w]*\] - excludes genre tags of the form [tag], e.g., [Amazon Freebie]
+- The default regex - \[[\w ]*\] - excludes genre tags of the form [tag], e.g., [Amazon Freebie]
 - A regex pattern of a single dot excludes all genre tags, generating no Genre Section</string>
     </property>
     <property name="wordWrap">
--- a/src/calibre/gui2/convert/gui_conversion.py
+++ b/src/calibre/gui2/convert/gui_conversion.py
@ -57,7 +57,8 @@ def gui_catalog(fmt, title, dbspec, ids, out_file_name, sync, fmt_options,
            setattr(opts,option, fmt_options[option])
    # Fetch and run the plugin for fmt
    # Returns 0 if successful, 1 if no catalog built
    plugin = plugin_for_catalog_format(fmt)
-    plugin.run(out_file_name, opts, db, notification=notification)
+    return plugin.run(out_file_name, opts, db, notification=notification)
--- a/src/calibre/gui2/device.py
+++ b/src/calibre/gui2/device.py
@ -149,7 +149,7 @@ class DeviceManager(Thread):
                    possibly_connected_devices.append((device, detected_device))
            if possibly_connected_devices:
                if not self.do_connect(possibly_connected_devices):
-                    print 'Connect to device failed, retying in 5 seconds...'
+                    print 'Connect to device failed, retrying in 5 seconds...'
                    time.sleep(5)
                    if not self.do_connect(possibly_connected_devices):
                        print 'Device connect failed again, giving up'
--- a/src/calibre/gui2/dialogs/metadata_single.py
+++ b/src/calibre/gui2/dialogs/metadata_single.py
@ -594,6 +594,11 @@ class MetadataSingleDialog(ResizableDialog, Ui_MetadataSingleDialog):
                            self.rating.setValue(int(book.rating))
                        if book.tags:
                            self.tags.setText(', '.join(book.tags))
                        if book.series is not None:
                            if self.series.text() is None or self.series.text() == '':
                               self.series.setText(book.series)
                               if book.series_index is not None:
                                  self.series_index.setValue(book.series_index)
        else:
            error_dialog(self, _('Cannot fetch metadata'),
                         _('You must specify at least one of ISBN, Title, '
--- a/src/calibre/gui2/library.py
+++ b/src/calibre/gui2/library.py
@ -903,9 +903,13 @@ class OnDeviceSearch(SearchQueryParser):
            locations[i] = q[v]
        for i, r in enumerate(self.model.db):
            for loc in locations:
-                if query in loc(r):
+                try:
-                    matches.add(i)
+                    if query in loc(r):
-                    break
+                        matches.add(i)
                        break
                except ValueError: # Unicode errors
                    import traceback
                    traceback.print_exc()
        return matches
--- a/src/calibre/gui2/ui.py
+++ b/src/calibre/gui2/ui.py
@ -1394,6 +1394,11 @@ class Main(MainWindow, Ui_MainWindow, DeviceGUI):
        self.status_bar.showMessage(_('Generating %s catalog...')%fmt)
    def catalog_generated(self, job):
 		if job.result:
 			# Search terms nulled catalog results
            return error_dialog(self, _('No books found'),
                    _("No books to catalog\nCheck exclude tags"),
                    show=True)
        if job.failed:
            return self.job_exception(job)
        id = self.library_view.model().add_catalog(job.catalog_file_path, job.catalog_title)
--- a/src/calibre/library/catalog.py
+++ b/src/calibre/library/catalog.py
@ -927,8 +927,16 @@ class EPUB_MOBI(CatalogPlugin):
            for record in data:
                this_title = {}
-                title = this_title['title'] = self.convertHTMLEntities(record['title'])
+                this_title['title'] = self.convertHTMLEntities(record['title'])
-                this_title['title_sort'] = self.generateSortTitle(title)
+                if record['series']:
                    this_title['series'] = record['series']
                    this_title['series_index'] = record['series_index']
                    this_title['title'] = self.generateSeriesTitle(this_title)
                else:
                    this_title['series'] = None
                    this_title['series_index'] = 0.0
                this_title['title_sort'] = self.generateSortTitle(this_title['title'])
                if 'authors' in record and len(record['authors']):
                    this_title['author'] = " &amp; ".join(record['authors'])
                else:
@ -984,12 +992,59 @@ class EPUB_MOBI(CatalogPlugin):
        def fetchBooksByAuthor(self):
            # Generate a list of titles sorted by author from the database
            def author_compare(x,y):
                # Return -1 if x<y
                # Return  0 if x==y
                # Return  1 if x>y
                # Different authors - sort by author_sort
                if x['author_sort'] > y['author_sort']:
                    return 1
                elif x['author_sort'] < y['author_sort']:
                    return -1
                else:
                    # Same author
                    if x['series'] != y['series']:
                        # Different series
                        if x['title_sort'].lstrip() > y['title_sort'].lstrip():
                            return 1
                        else:
                            return -1
                    else:
                        # Same series
                        if x['series'] == y['series']:
                            if float(x['series_index']) > float(y['series_index']):
                                return 1
                            elif float(x['series_index']) < float(y['series_index']):
                                return -1
                            else:
                                return 0
                        else:
                            if x['series'] > y['series']:
                                return 1
                            else:
                                return -1
            self.updateProgressFullStep("Sorting database")
-            # Sort titles case-insensitive
+            '''
            # Sort titles case-insensitive, by author
            self.booksByAuthor = sorted(self.booksByTitle,
                                 key=lambda x:(x['author_sort'].upper(), x['author_sort'].upper()))
            '''
            self.booksByAuthor = list(self.booksByTitle)
            self.booksByAuthor.sort(author_compare)
            if False and self.verbose:
                self.opts.log.info("fetchBooksByAuthor(): %d books" % len(self.booksByAuthor))
                self.opts.log.info(" %-30s %-20s %s" % ('title', 'title_sort','series', 'series_index'))
                for title in self.booksByAuthor:
                    self.opts.log.info((u" %-30s %-20s %-20s%5s " % \
                                        (title['title'][:30],
                                         title['series'][:20] if title['series'] else '',
                                         title['series_index'],
                                         )).encode('utf-8'))
                raise SystemExit
            # Build the unique_authors set from existing data
            authors = [(record['author'], record['author_sort']) for record in self.booksByAuthor]
@ -1063,7 +1118,17 @@ class EPUB_MOBI(CatalogPlugin):
                # Insert the book title
                #<p class="title"><a name="<database_id>"></a><em>Book Title</em></p>
                emTag = Tag(soup, "em")
-                emTag.insert(0, NavigableString(escape(title['title'])))
+                if title['series']:
                    # title<br />series series_index
                    brTag = Tag(soup,'br')
                    title_tokens = title['title'].split(': ')
                    emTag.insert(0, NavigableString(title_tokens[1]))
                    emTag.insert(1, brTag)
                    smallTag = Tag(soup,'small')
                    smallTag.insert(0,NavigableString(title_tokens[0]))
                    emTag.insert(2, smallTag)
                else:
                    emTag.insert(0, NavigableString(escape(title['title'])))
                titleTag = body.find(attrs={'class':'title'})
                titleTag.insert(0,emTag)
@ -1073,7 +1138,12 @@ class EPUB_MOBI(CatalogPlugin):
                aTag['href'] = "%s.html#%s" % ("ByAlphaAuthor", self.generateAuthorAnchor(title['author']))
                #aTag.insert(0, escape(title['author']))
                aTag.insert(0, title['author'])
-                authorTag.insert(0, NavigableString("by "))
+
                # Insert READ_SYMBOL
                if title['read']:
                    authorTag.insert(0, NavigableString(self.READ_SYMBOL + "by "))
                else:
                    authorTag.insert(0, NavigableString(self.NOT_READ_SYMBOL + "by "))
                authorTag.insert(1, aTag)
                '''
@ -1085,6 +1155,27 @@ class EPUB_MOBI(CatalogPlugin):
                    tagsTag.insert(0,emTag)
                '''
                '''
                # Insert Series info or remove.
                seriesTag = body.find(attrs={'class':'series'})
                if title['series']:
                    # Insert a spacer to match the author indent
                    stc = 0
                    fontTag = Tag(soup,"font")
                    fontTag['style'] = 'color:white;font-size:large'
                    if self.opts.fmt == 'epub':
                        fontTag['style'] += ';opacity: 0.0'
                    fontTag.insert(0, NavigableString("by "))
                    seriesTag.insert(stc, fontTag)
                    stc += 1
                    if float(title['series_index']) - int(title['series_index']):
                        series_str = 'Series: %s [%4.2f]' % (title['series'], title['series_index'])
                    else:
                        series_str = '%s [%d]' % (title['series'], title['series_index'])
                    seriesTag.insert(stc,NavigableString(series_str))
                else:
                    seriesTag.extract()
                '''
                # Insert linked genres
                if 'tags' in title:
                    tagsTag = body.find(attrs={'class':'tags'})
@ -1118,7 +1209,12 @@ class EPUB_MOBI(CatalogPlugin):
                else:
                    imgTag['src']  = "../images/thumbnail_default.jpg"
                imgTag['alt'] = "cover"
-                imgTag['style'] = 'width: %dpx; height:%dpx;' % (self.THUMB_WIDTH, self.THUMB_HEIGHT)
+
                # Tweak image size if we're building for Sony, not sure why this is needed
                if self.opts.fmt == 'epub' and self.opts.output_profile.startswith("sony"):
                    imgTag['style'] = 'width: %dpx; height:%dpx;' % (self.THUMB_WIDTH * 2, self.THUMB_HEIGHT * 2)
                else:
                    imgTag['style'] = 'width: %dpx; height:%dpx;' % (self.THUMB_WIDTH, self.THUMB_HEIGHT)
                thumbnailTag = body.find(attrs={'class':'thumbnail'})
                thumbnailTag.insert(0,imgTag)
@ -1310,8 +1406,9 @@ class EPUB_MOBI(CatalogPlugin):
            dtc = 0
            current_letter = ""
            current_author = ""
            current_series = None
-            # Loop through books_by_author
+            # Loop through booksByAuthor
            book_count = 0
            for book in self.booksByAuthor:
                book_count += 1
@ -1349,11 +1446,23 @@ class EPUB_MOBI(CatalogPlugin):
                    divTag.insert(dtc,pAuthorTag)
                    dtc += 1
                # Check for series
                if book['series'] and book['series'] != current_series:
                    # Start a new series
                    current_series = book['series']
                    pSeriesTag = Tag(soup,'p')
                    pSeriesTag['class'] = "series"
                    pSeriesTag.insert(0,NavigableString(self.NOT_READ_SYMBOL + book['series']))
                    divTag.insert(dtc,pSeriesTag)
                    dtc += 1
                if current_series and not book['series']:
                    current_series = None
                # Add books
                pBookTag = Tag(soup, "p")
                ptc = 0
-                # Prefix book with read/unread symbol
+                #  book with read/unread symbol
                if book['read']:
                    # check mark
                    pBookTag.insert(ptc,NavigableString(self.READ_SYMBOL))
@ -1367,7 +1476,11 @@ class EPUB_MOBI(CatalogPlugin):
                aTag = Tag(soup, "a")
                aTag['href'] = "book_%d.html" % (int(float(book['id'])))
-                aTag.insert(0,escape(book['title']))
+                # Use series, series index if avail else just title
                if current_series:
                    aTag.insert(0,escape(book['title'][len(book['series'])+1:]))
                else:
                    aTag.insert(0,escape(book['title']))
                pBookTag.insert(ptc, aTag)
                ptc += 1
@ -1419,6 +1532,7 @@ class EPUB_MOBI(CatalogPlugin):
                    divTag.insert(dtc,pIndexTag)
                    dtc += 1
                    current_author = None
                    current_series = None
                    for new_entry in this_months_list:
                        if new_entry['author'] != current_author:
@ -1435,6 +1549,18 @@ class EPUB_MOBI(CatalogPlugin):
                            divTag.insert(dtc,pAuthorTag)
                            dtc += 1
                        # Check for series
                        if new_entry['series'] and new_entry['series'] != current_series:
                            # Start a new series
                            current_series = new_entry['series']
                            pSeriesTag = Tag(soup,'p')
                            pSeriesTag['class'] = "series"
                            pSeriesTag.insert(0,NavigableString(self.NOT_READ_SYMBOL + new_entry['series']))
                            divTag.insert(dtc,pSeriesTag)
                            dtc += 1
                        if current_series and not new_entry['series']:
                            current_series = None
                        # Add books
                        pBookTag = Tag(soup, "p")
                        ptc = 0
@ -1453,7 +1579,10 @@ class EPUB_MOBI(CatalogPlugin):
                        aTag = Tag(soup, "a")
                        aTag['href'] = "book_%d.html" % (int(float(new_entry['id'])))
-                        aTag.insert(0,escape(new_entry['title']))
+                        if current_series:
                            aTag.insert(0,escape(new_entry['title'][len(new_entry['series'])+1:]))
                        else:
                            aTag.insert(0,escape(new_entry['title']))
                        pBookTag.insert(ptc, aTag)
                        ptc += 1
@ -1554,6 +1683,7 @@ class EPUB_MOBI(CatalogPlugin):
                        this_book['author_sort'] = book['author_sort']
                        this_book['read'] = book['read']
                        this_book['id'] = book['id']
                        this_book['series'] = book['series']
                        normalized_tag = self.genre_tags_dict[friendly_tag]
                        genre_tag_list = [key for genre in genre_list for key in genre]
                        if normalized_tag in genre_tag_list:
@ -1579,7 +1709,9 @@ class EPUB_MOBI(CatalogPlugin):
                for genre in genre_list:
                    for key in genre:
-                        self.opts.log.info("      %s: %d titles" % (key, len(genre[key])))
+                        self.opts.log.info("      %s: %d %s" % (self.getFriendlyGenreTag(key),
                                           len(genre[key]),
                                           'titles' if len(genre[key]) > 1 else 'title'))
            # Write the results
            # genre_list = [ {friendly_tag:[{book},{book}]}, {friendly_tag:[{book},{book}]}, ...]
@ -1786,7 +1918,9 @@ class EPUB_MOBI(CatalogPlugin):
                mtc += 1
            # HTML files - add books to manifest and spine
-            for book in self.booksByTitle:
+            sort_descriptions_by = self.booksByAuthor if self.opts.sort_descriptions_by_author \
                                                      else self.booksByTitle
            for book in sort_descriptions_by:
                # manifest
                itemTag = Tag(soup, "item")
                itemTag['href'] = "content/book_%d.html" % int(book['id'])
@ -1912,7 +2046,9 @@ class EPUB_MOBI(CatalogPlugin):
            nptc += 1
            # Loop over the titles
-            for book in self.booksByTitle:
+            sort_descriptions_by = self.booksByAuthor if self.opts.sort_descriptions_by_author \
                                                      else self.booksByTitle
            for book in sort_descriptions_by:
                navPointVolumeTag = Tag(ncx_soup, 'navPoint')
                navPointVolumeTag['class'] = "article"
                navPointVolumeTag['id'] = "book%dID" % int(book['id'])
@ -1920,7 +2056,11 @@ class EPUB_MOBI(CatalogPlugin):
                self.playOrder += 1
                navLabelTag = Tag(ncx_soup, "navLabel")
                textTag = Tag(ncx_soup, "text")
-                textTag.insert(0, NavigableString(self.formatNCXText(book['title'])))
+                if book['series']:
                    tokens = book['title'].split(': ')
                    textTag.insert(0, NavigableString(self.formatNCXText('%s (%s)' % (tokens[1], tokens[0]))))
                else:
                    textTag.insert(0, NavigableString(self.formatNCXText(book['title'])))
                navLabelTag.insert(0,textTag)
                navPointVolumeTag.insert(0,navLabelTag)
@ -2426,15 +2566,25 @@ class EPUB_MOBI(CatalogPlugin):
                        else:
                            yield tag
-                self.opts.log.info(u'     %d available genre tags in database (exclude_genre: %s):' % \
+                self.opts.log.info(u'     %d genre tags in database (excluding genres matching %s):' % \
                                     (len(genre_tags_dict), self.opts.exclude_genre))
                # Display friendly/normalized genres
                # friendly => normalized
-                sorted_tags = ['%s => %s' % (key, genre_tags_dict[key]) for key in sorted(genre_tags_dict.keys())]
+                if False:
-
+                    sorted_tags = ['%s => %s' % (key, genre_tags_dict[key]) for key in sorted(genre_tags_dict.keys())]
-                for tag in next_tag(sorted_tags):
+                    for tag in next_tag(sorted_tags):
-                    self.opts.log(u'      %s' % tag)
+                        self.opts.log(u'      %s' % tag)
                else:
                    sorted_tags = ['%s' % (key) for key in sorted(genre_tags_dict.keys())]
                    out_str = ''
                    line_break = 70
                    for tag in next_tag(sorted_tags):
                        out_str += tag
                        if len(out_str) >= line_break:
                            self.opts.log.info('      %s' % out_str)
                            out_str = ''
                    self.opts.log.info('      %s' % out_str)
            return genre_tags_dict
@ -2474,19 +2624,15 @@ class EPUB_MOBI(CatalogPlugin):
            body.insert(btc,aTag)
            btc += 1
            # Find the first instance of friendly_tag matching genre
            for friendly_tag in self.genre_tags_dict:
                if self.genre_tags_dict[friendly_tag] == genre:
                    break
            titleTag = body.find(attrs={'class':'title'})
-            titleTag.insert(0,NavigableString('<b><i>%s</i></b>' % escape(friendly_tag)))
+            titleTag.insert(0,NavigableString('<b><i>%s</i></b>' % escape(self.getFriendlyGenreTag(genre))))
            # Insert the books by author list
            divTag = body.find(attrs={'class':'authors'})
            dtc = 0
            current_author = ''
            current_series = None
            for book in books:
                if book['author'] != current_author:
                    # Start a new author with link
@ -2502,6 +2648,19 @@ class EPUB_MOBI(CatalogPlugin):
                    divTag.insert(dtc,pAuthorTag)
                    dtc += 1
                # Check for series
                if book['series'] and book['series'] != current_series:
                    # Start a new series
                    current_series = book['series']
                    pSeriesTag = Tag(soup,'p')
                    pSeriesTag['class'] = "series"
                    pSeriesTag.insert(0,NavigableString(self.NOT_READ_SYMBOL + book['series']))
                    divTag.insert(dtc,pSeriesTag)
                    dtc += 1
                if current_series and not book['series']:
                    current_series = None
                # Add books
                pBookTag = Tag(soup, "p")
                ptc = 0
@ -2518,7 +2677,11 @@ class EPUB_MOBI(CatalogPlugin):
                # Add the book title
                aTag = Tag(soup, "a")
                aTag['href'] = "book_%d.html" % (int(float(book['id'])))
-                aTag.insert(0,escape(book['title']))
+                # Use series, series index if avail else just title
                if current_series:
                    aTag.insert(0,escape(book['title'][len(book['series'])+1:]))
                else:
                    aTag.insert(0,escape(book['title']))
                pBookTag.insert(ptc, aTag)
                ptc += 1
@ -2553,6 +2716,7 @@ class EPUB_MOBI(CatalogPlugin):
            <p class="title"></p>
            {0}
            <p class="author"></p>
            <!--p class="series"></p-->
            <p class="tags">&nbsp;</p>
            <table width="100%" border="0">
              <tr>
@ -2678,6 +2842,17 @@ class EPUB_MOBI(CatalogPlugin):
            draw.text((left, top), text, fill=(0,0,0), font=font)
            img.save(open(out_path, 'wb'), 'GIF')
        def generateSeriesTitle(self, title):
            if float(title['series_index']) - int(title['series_index']):
                series_title = '%s %4.2f: %s' % (title['series'],
                                                title['series_index'],
                                                title['title'])
            else:
                series_title = '%s %d: %s' % (title['series'],
                                             title['series_index'],
                                             title['title'])
            return series_title
        def generateShortDescription(self, description):
            # Truncate the description to description_clip, on word boundaries if necessary
            if not description:
@ -2775,33 +2950,115 @@ class EPUB_MOBI(CatalogPlugin):
            else:
                return char
        def getFriendlyGenreTag(self, genre):
            # Find the first instance of friendly_tag matching genre
            for friendly_tag in self.genre_tags_dict:
                if self.genre_tags_dict[friendly_tag] == genre:
                    return friendly_tag
        def markdownComments(self, comments):
-            ''' Convert random comment text to normalized, xml-legal block of <p>s'''
+            '''
-            # reformat illegal xml
+            Convert random comment text to normalized, xml-legal block of <p>s
-            desc = prepare_string_for_xml(comments)
+            'plain text' returns as
            <p>plain text</p>
-            # normalize <br/> tags
+            'plain text with <i>minimal</i> <b>markup</b>' returns as
-            desc = re.sub(r'&lt;br[/]{0,1}&gt;', '<br/>', desc)
+            <p>plain text with <i>minimal</i> <b>markup</b></p>
-            # tokenize double line breaks
+            '<p>pre-formatted text</p> returns untouched
            desc = comments.replace('\r', '')
            tokens = comments.split('\n\n')
-            soup = BeautifulSoup()
+            'A line of text\n\nFollowed by a line of text' returns as
-            ptc = 0
+            <p>A line of text</p>
-            for token in tokens:
+            <p>Followed by a line of text</p>
-                pTag = Tag(soup, 'p')
+
-                pTag.insert(0,token)
+            'A line of text.\nA second line of text.\rA third line of text' returns as
-                soup.insert(ptc, pTag)
+            <p>A line of text.<br />A second line of text.<br />A third line of text.</p>
-                ptc += 1
+
-            return soup.renderContents(encoding=None)
+            '...end of a paragraph.Somehow the break was lost...' returns as
            <p>...end of a paragraph.</p>
            <p>Somehow the break was lost...</p>
            Deprecated HTML returns as HTML via BeautifulSoup()
            '''
            # Explode lost CRs to \n\n
            # Hackish - ignoring sentences ending or beginning in numbers to avoid
            # confusion with decimal points.
            for lost_cr in re.finditer('([a-z])([\.\?!])([A-Z])',comments):
                comments = comments.replace(lost_cr.group(),
                                            '%s%s\n\n%s' % (lost_cr.group(1),
                                                            lost_cr.group(2),
                                                            lost_cr.group(3)))
            # Convert \n\n to <p>s
            if re.search('\n\n', comments):
                soup = BeautifulSoup()
                split_ps = comments.split('\n\n')
                tsc = 0
                for p in split_ps:
                    pTag = Tag(soup,'p')
                    pTag.insert(0,p)
                    soup.insert(tsc,pTag)
                    tsc += 1
                comments = soup.renderContents()
            # Convert solo returns to <br />
            comments = re.sub('[\r\n]','<br />', comments)
            soup = BeautifulSoup(comments)
            result = BeautifulSoup()
            rtc = 0
            open_pTag = False
            all_tokens = list(soup.contents)
            for token in all_tokens:
                if type(token) is NavigableString:
                    if not open_pTag:
                        pTag = Tag(result,'p')
                        open_pTag = True
                        ptc = 0
                    pTag.insert(ptc,prepare_string_for_xml(token))
                    ptc += 1
                elif token.name in ['br','b','i']:
                    if not open_pTag:
                        pTag = Tag(result,'p')
                        open_pTag = True
                        ptc = 0
                    pTag.insert(ptc, token)
                    ptc += 1
                else:
                    if open_pTag:
                        result.insert(rtc, pTag)
                        rtc += 1
                        open_pTag = False
                        ptc = 0
                    # Clean up NavigableStrings for xml
                    sub_tokens = list(token.contents)
                    for sub_token in sub_tokens:
                        if type(sub_token) is NavigableString:
                            sub_token.replaceWith(prepare_string_for_xml(sub_token))
                    result.insert(rtc, token)
                    rtc += 1
            if open_pTag:
                result.insert(rtc, pTag)
            paras = result.findAll('p')
            for p in paras:
                p['class'] = 'description'
            return result.renderContents(encoding=None)
        def processSpecialTags(self, tags, this_title, opts):
            tag_list = []
            for tag in tags:
                tag = self.convertHTMLEntities(tag)
                if tag.startswith(opts.note_tag):
-                    this_title['notes'] = tag[1:]
+                    this_title['notes'] = tag[len(self.opts.note_tag):]
                elif tag == opts.read_tag:
                    this_title['read'] = True
                elif re.search(opts.exclude_genre, tag):
@ -2847,6 +3104,8 @@ class EPUB_MOBI(CatalogPlugin):
        opts.basename = "Catalog"
        opts.plugin_path = self.plugin_path
        opts.cli_environment = not hasattr(opts,'sync')
        # GwR *** hardwired to sort by author, could be an option if passed in opts
        opts.sort_descriptions_by_author = True
        if opts.verbose:
            opts_dict = vars(opts)
@ -2855,15 +3114,30 @@ class EPUB_MOBI(CatalogPlugin):
                 'CLI' if opts.cli_environment else 'GUI'))
            if opts_dict['ids']:
                log(" Book count: %d" % len(opts_dict['ids']))
            sections_list = ['Descriptions','Authors']
            if opts.generate_titles:
                sections_list.append('Titles')
            if opts.generate_recently_added:
                sections_list.append('Recently Added')
            if not opts.exclude_genre.strip() == '.':
                sections_list.append('Genres')
            log(u"Creating Sections for %s" % ', '.join(sections_list))
            # If exclude_genre is blank, assume user wants all genre tags included
            if opts.exclude_genre.strip() == '':
                opts.exclude_genre = '\[^.\]'
                log(" converting empty exclude_genre to '\[^.\]'")
            # Display opts
            keys = opts_dict.keys()
            keys.sort()
            log(" opts:")
            for key in keys:
-                if key in ['catalog_title','exclude_genre','exclude_tags','generate_titles',
+                if key in ['catalog_title','exclude_genre','exclude_tags',
-                           'generate_recently_added','note_tag','numbers_as_text','read_tag',
+                           'note_tag','numbers_as_text','read_tag',
-                           'search_text','sort_by','sync']:
+                           'search_text','sort_by','sort_descriptions_by_author','sync']:
                    log("  %s: %s" % (key, opts_dict[key]))
        # Launch the Catalog builder
--- a/src/calibre/manual/faq.rst
+++ b/src/calibre/manual/faq.rst
@ -62,7 +62,7 @@ How do I convert my file containing non-English characters, or smart quotes?
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 There are two aspects to this problem: 
  1. Knowing the encoding of the source file: |app| tries to guess what character encoding your source files use, but often, this is impossible, so you need to tell it what encoding to use. This can be done in the GUI via the :guilabel:`Input character encoding` field in the :guilabel:`Look & Feel` section. The command-line tools all have an :option:`--input-encoding` option.
-  2. When adding HTML files to |app|, you may need to tell |app| what encoding the files are in. To do this go to Preferences->Plugins->File Type plugins and customize the HTML2Zip plugin, telling it what encoding your HTML files are in. Now when you add HTML files to |app| they will be correctly processed. HTML files from different sources often have different encodings, so you may have to change this setting repeatedly. A common encoding for many files from the web is ``cp1252`` and I would suggest you try that first.
+  2. When adding HTML files to |app|, you may need to tell |app| what encoding the files are in. To do this go to Preferences->Plugins->File Type plugins and customize the HTML2Zip plugin, telling it what encoding your HTML files are in. Now when you add HTML files to |app| they will be correctly processed. HTML files from different sources often have different encodings, so you may have to change this setting repeatedly. A common encoding for many files from the web is ``cp1252`` and I would suggest you try that first. Note that when converting HTML files, leave the input encoding setting mentioned above blank. This is because the HTML2ZIP plugin automatically converts the HTML files to a standard encoding (utf-8). 
  3. Embedding fonts: If you are generating an LRF file to read on your SONY Reader, you are limited by the fact that the Reader only supports a few non-English characters in the fonts it comes pre-loaded with. You can work around this problem by embedding a unicode-aware font that supports the character set your file uses into the LRF file. You should embed atleast a serif and a sans-serif font. Be aware that embedding fonts significantly slows down page-turn speed on the reader. 
--- a/src/calibre/web/feeds/news.py
+++ b/src/calibre/web/feeds/news.py
@ -615,10 +615,12 @@ class BasicNewsRecipe(Recipe):
                del o['onload']
        for script in list(soup.findAll('noscript')):
-                script.extract()
+            script.extract()
        for attr in self.remove_attributes:
            for x in soup.findAll(attrs={attr:True}):
                del x[attr]
        for base in list(soup.findAll('base')):
            base.extract()
        return self.postprocess_html(soup, first_fetch)