Sync to trunk.

2025-07-07 10:14:46 -04:00 · 2010-01-21 17:13:09 -05:00 · 2010-01-21 17:13:09 -05:00 · a0d1670e6f
commit a0d1670e6f
parent 4b3f998e9c 24a6d43b91
67 changed files with 2577 additions and 486 deletions
--- a/resources/images/news/ad.png
+++ b/resources/images/news/ad.png
--- a/resources/images/news/digitaljournal.png
+++ b/resources/images/news/digitaljournal.png
--- a/resources/images/news/kitsapun.png
+++ b/resources/images/news/kitsapun.png
--- a/resources/images/news/ledevoir.png
+++ b/resources/images/news/ledevoir.png
--- a/resources/recipes/ad.recipe
+++ b/resources/recipes/ad.recipe
@ -0,0 +1,86 @@
 import re
 from calibre.web.feeds.news import BasicNewsRecipe
 class ADRecipe(BasicNewsRecipe):
    __license__  = 'GPL v3'
    __author__ = 'kwetal'
    language = 'nl'
    country = 'NL'
    version = 1
    title = u'AD'
    publisher = u'de Persgroep Publishing Nederland NV'
    category = u'News, Sports, the Netherlands'
    description = u'News and Sports from the Netherlands'
    oldest_article = 1.2
    max_articles_per_feed = 100
    use_embedded_content = False
    remove_empty_feeds = True
    no_stylesheets = True
    remove_javascript = True
    keep_only_tags = []
    keep_only_tags.append(dict(name = 'div', attrs = {'id': 'art_box2'}))
    keep_only_tags.append(dict(name = 'p', attrs = {'class': 'gen_footnote3'}))
    remove_tags = []
    remove_tags.append(dict(name = 'div', attrs = {'class': 'gen_clear'}))
    remove_tags.append(dict(name = 'div', attrs = {'class': re.compile(r'gen_spacer.*')}))
    remove_attributes = ['style']
    # feeds from http://ad.nl/ad/nl/1401/home/integration/nmc/frameset/ad_footer/rssFeeds.dhtml
    feeds = []
    feeds.append((u'Binnenland', u'http://www.ad.nl/nieuws/binnenland/rss.xml'))
    feeds.append((u'Buitenland', u'http://www.ad.nl/nieuws/buitenland/rss.xml'))
    feeds.append((u'Bizar', u'http://www.ad.nl/nieuws/bizar/rss.xml'))
    feeds.append((u'Gezondheid & Wetenschap', u'http://www.ad.nl/nieuws/gezondheidwetenschap/rss.xml'))
    feeds.append((u'Economie', u'http://www.ad.nl/nieuws/economie/rss.xml'))
    feeds.append((u'Nederlands Voetbal', u'http://www.ad.nl/sportwereld/nederlandsvoetbal/rss.xml'))
    feeds.append((u'Buitenlands Voetbal', u'http://www.ad.nl/sportwereld/buitenlandsvoetbal/rss.xml'))
    feeds.append((u'Champions League/Europa League', u'http://www.ad.nl/sportwereld/championsleagueeuropaleague/rss.xml'))
    feeds.append((u'Wielrennen', u'http://www.ad.nl/sportwereld/wielrennen/rss.xml'))
    feeds.append((u'Tennis', u'http://www.ad.nl/sportwereld/tennis/rss.xml'))
    feeds.append((u'Formule 1', u'http://www.ad.nl/sportwereld/formule1/rss.xml'))
    feeds.append((u'Meer Sport', u'http://www.ad.nl/sportwereld/meersport/rss.xml'))
    feeds.append((u'Celebs', u'http://www.ad.nl/showbizz/celebs/rss.xml'))
    feeds.append((u'Film', u'http://www.ad.nl/showbizz/film/rss.xml'))
    feeds.append((u'Muziek', u'http://www.ad.nl/showbizz/muziek/rss.xml'))
    feeds.append((u'TV', u'http://www.ad.nl/showbizz/tv/rss.xml'))
    feeds.append((u'Kunst & Literatuur', u'http://www.ad.nl/showbizz/kunstenliteratuur/rss.xml'))
    feeds.append((u'Jouw Wereld', u'http://www.ad.nl/you/rss.xml'))
    feeds.append((u'Consument', u'http://www.ad.nl/consument/rss.xml'))
    feeds.append((u'Autowereld', u'http://www.ad.nl/autowereld/rss.xml'))
    feeds.append((u'Reiswereld', u'http://www.ad.nl/reiswereld/rss.xml'))
    feeds.append((u'Internet', u'http://www.ad.nl/digitaal/internet/rss.xml'))
    feeds.append((u'Games', u'http://www.ad.nl/digitaal/games/rss.xml'))
    feeds.append((u'Multimedia', u'http://www.ad.nl/digitaal/multimedia/rss.xml'))
    feeds.append((u'Planet Watch', u'http://www.ad.nl/planetwatch/rss.xml'))
    extra_css = '''
                body {font-family: verdana, arial, helvetica, geneva, sans-serif;}
                div.captionEmbeddedMasterObject {font-size: x-small; font-style: italic; color: #696969;}
                .gen_footnote3 {font-size: small; color: #666666; margin-top: 0.6em;}
                '''
    conversion_options = {'comments': description, 'tags': category, 'language': 'en',
                          'publisher': publisher}
    def print_version(self, url):
        parts = url.split('/')
        print_url = 'http://' + parts[2] + '/' + parts[3] + '/' + parts[4] + '/' + parts[5] + '/' \
                + parts[10] + '/' + parts[7] + '/print/' + parts[8] + '/' + parts[9] + '/' + parts[13]
        return print_url
    def preprocess_html(self, soup):
        for br in soup.findAll('br'):
            prev = br.findPreviousSibling(True)
            if hasattr(prev, 'name') and prev.name == 'br':
                next = br.findNextSibling(True)
                if hasattr(next, 'name') and next.name == 'br':
                    br.extract()
        return soup
--- a/resources/recipes/amspec.recipe
+++ b/resources/recipes/amspec.recipe
@ -1,7 +1,5 @@
 #!/usr/bin/env  python
 __license__   = 'GPL v3'
-__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
+__copyright__ = '2009-2010, Darko Miletic <darko.miletic at gmail.com>'
 '''
 spectator.org
 '''
@ -11,20 +9,22 @@ from calibre.web.feeds.news import BasicNewsRecipe
 class TheAmericanSpectator(BasicNewsRecipe):
    title                 = 'The American Spectator'
    __author__            = 'Darko Miletic'
    language = 'en'
    description           = 'News from USA'
    category              = 'news, politics, USA, world'
    publisher             = 'The American Spectator'
    oldest_article        = 7
    max_articles_per_feed = 100
    no_stylesheets        = True
    use_embedded_content  = False
    language              = 'en'
    INDEX                 = 'http://spectator.org'
-    html2lrf_options = [
+    conversion_options = {  
-                             '--comment'       , description
+                             'comments'        : description
-                           , '--category'      , 'news, politics, USA'
+                            ,'tags'            : category
-                           , '--publisher'     , title
+                            ,'language'        : language
-                         ]
+                            ,'publisher'       : publisher
                         }
    keep_only_tags   = [
                             dict(name='div', attrs={'class':'post inner'})
@ -33,13 +33,11 @@ class TheAmericanSpectator(BasicNewsRecipe):
    remove_tags     = [
                             dict(name='object')
-                            ,dict(name='div', attrs={'class':'col3'         })
+                            ,dict(name='div', attrs={'class':['col3','post-options','social']})
-                            ,dict(name='div', attrs={'class':'post-options' })
+                            ,dict(name='p'  , attrs={'class':['letter-editor','meta']})
                            ,dict(name='p'  , attrs={'class':'letter-editor'})
                            ,dict(name='div', attrs={'class':'social'       })
                        ]
-    feeds = [ (u'Articles', u'http://feedproxy.google.com/amspecarticles')]
+    feeds = [ (u'Articles', u'http://feeds.feedburner.com/amspecarticles')]
    def get_cover_url(self):
        cover_url = None
@ -53,3 +51,7 @@ class TheAmericanSpectator(BasicNewsRecipe):
    def print_version(self, url):
        return url + '/print'
    def get_article_url(self, article):
        return article.get('guid', None)
--- a/resources/recipes/bbc_fast.recipe
+++ b/resources/recipes/bbc_fast.recipe
@ -0,0 +1,60 @@
 __license__   = 'GPL v3'
 __copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
 '''
 news.bbc.co.uk
 '''
 from calibre.web.feeds.recipes import BasicNewsRecipe
 class BBC(BasicNewsRecipe):
    title                  = 'BBC News (fast)'
    __author__             = 'Darko Miletic'
    description            = 'News from UK. A much faster version that does not download pictures'
    oldest_article         = 2
    max_articles_per_feed  = 100
    no_stylesheets         = True
    #delay                  = 1
    use_embedded_content   = False
    encoding               = 'utf8'
    publisher              = 'BBC'
    category               = 'news, UK, world'
    language               = 'en'
    extra_css              = ' body{ font-family: sans-serif; } .headline{font-size: xx-large; font-weight: bold} .ibox{display: block; margin: 20px 50px; padding: 10px; border: 1px solid } '
    conversion_options = {
                             'comments'        : description
                            ,'tags'            : category
                            ,'language'        : language
                            ,'publisher'       : publisher
                         }
    remove_tags_before = dict(name='div',attrs={'class':'headline'})
    remove_tags_after  = dict(name='div', attrs={'class':'footer'})
    remove_tags       = [
                           dict(name=['object','link','script','iframe'])
                          ,dict(name='div', attrs={'class':'footer'})
                        ]
    feeds          = [
                      ('News Front Page', 'http://newsrss.bbc.co.uk/rss/newsonline_world_edition/front_page/rss.xml'),
                      ('Science/Nature', 'http://newsrss.bbc.co.uk/rss/newsonline_world_edition/science/nature/rss.xml'),
                      ('Technology', 'http://newsrss.bbc.co.uk/rss/newsonline_world_edition/technology/rss.xml'),
                      ('Entertainment', 'http://newsrss.bbc.co.uk/rss/newsonline_world_edition/entertainment/rss.xml'),
                      ('Magazine', 'http://newsrss.bbc.co.uk/rss/newsonline_world_edition/uk_news/magazine/rss.xml'),
                      ('Business', 'http://newsrss.bbc.co.uk/rss/newsonline_world_edition/business/rss.xml'),
                      ('Health', 'http://newsrss.bbc.co.uk/rss/newsonline_world_edition/health/rss.xml'),
                      ('Americas', 'http://newsrss.bbc.co.uk/rss/newsonline_world_edition/americas/rss.xml'),
                      ('Europe', 'http://newsrss.bbc.co.uk/rss/newsonline_world_edition/europe/rss.xml'),
                      ('South Asia', 'http://newsrss.bbc.co.uk/rss/newsonline_world_edition/south_asia/rss.xml'),
                      ('UK', 'http://newsrss.bbc.co.uk/rss/newsonline_world_edition/uk_news/rss.xml'),
                      ('Asia-Pacific', 'http://newsrss.bbc.co.uk/rss/newsonline_world_edition/asia-pacific/rss.xml'),
                      ('Africa', 'http://newsrss.bbc.co.uk/rss/newsonline_world_edition/africa/rss.xml'),
                    ]
    def print_version(self, url):
        emp,sep,rstrip = url.partition('http://')
        return 'http://newsvote.bbc.co.uk/mpapps/pagetools/print/' + rstrip
    def get_article_url(self, article):
        return article.get('guid', None)
--- a/resources/recipes/calgary_herald.recipe
+++ b/resources/recipes/calgary_herald.recipe
@ -0,0 +1,121 @@
 #!/usr/bin/env  python
 __license__   = 'GPL v3'
 '''
 www.canada.com
 '''
 from calibre.web.feeds.recipes import BasicNewsRecipe
 class CanWestPaper(BasicNewsRecipe):
    # un-comment the following three lines for the Calgary Herald
    title = u'Calgary Herald'
    url_prefix = 'http://www.calgaryherald.com'
    description = u'News from Calgary, AB'
    # un-comment the following three lines for the Regina Leader-Post
    #title = u'Regina Leader-Post'
    #url_prefix = 'http://www.leaderpost.com'
    #description = u'News from Regina, SK'
    # un-comment the following three lines for the Saskatoon Star-Phoenix
    #title = u'Saskatoon Star-Phoenix'
    #url_prefix = 'http://www.thestarphoenix.com'
    #description = u'News from Saskatoon, SK'
    # un-comment the following three lines for the Windsor Star
    #title = u'Windsor Star'
    #url_prefix = 'http://www.windsorstar.com'
    #description = u'News from Windsor, ON'
    # un-comment the following three lines for the Ottawa Citizen
    #title = u'Ottawa Citizen'
    #url_prefix = 'http://www.ottawacitizen.com'
    #description = u'News from Ottawa, ON'
    # un-comment the following three lines for the Montreal Gazette
    #title = u'Montreal Gazette'
    #url_prefix = 'http://www.montrealgazette.com'
    #description = u'News from Montreal, QC'
    language = 'en_CA'
    __author__ = 'Nick Redding'
    no_stylesheets = True
    timefmt = ' [%b %d]'
    extra_css = '''
                .timestamp {  font-size:xx-small; display: block; }
                #storyheader { font-size: medium; }
                #storyheader h1 { font-size: x-large; }
                #storyheader h2 { font-size: large;  font-style: italic; }
                .byline { font-size:xx-small; }
                #photocaption { font-size: small; font-style: italic }
                #photocredit { font-size: xx-small; }'''
    keep_only_tags = [dict(name='div', attrs={'id':'storyheader'}),dict(name='div', attrs={'id':'storycontent'})]
    remove_tags = [{'class':'comments'},
                   dict(name='div', attrs={'class':'navbar'}),dict(name='div', attrs={'class':'morelinks'}),
                   dict(name='div', attrs={'class':'viewmore'}),dict(name='li', attrs={'class':'email'}),
                   dict(name='div', attrs={'class':'story_tool_hr'}),dict(name='div', attrs={'class':'clear'}),
                   dict(name='div', attrs={'class':'story_tool'}),dict(name='div', attrs={'class':'copyright'}),
                   dict(name='div', attrs={'class':'rule_grey_solid'}),
                   dict(name='li', attrs={'class':'print'}),dict(name='li', attrs={'class':'share'}),dict(name='ul', attrs={'class':'bullet'})]
    def preprocess_html(self,soup):
        #delete iempty id attributes--they screw up the TOC for unknow reasons
        divtags = soup.findAll('div',attrs={'id':''})
        if divtags:
            for div in divtags:
                del(div['id'])
        return soup
    def parse_index(self):
        soup = self.index_to_soup(self.url_prefix+'/news/todays-paper/index.html')
        articles = {}
        key = 'News'
        ans = ['News']
        # Find each instance of class="sectiontitle", class="featurecontent"
        for divtag in soup.findAll('div',attrs={'class' : ["section_title02","featurecontent"]}):
                #self.log(" div class = %s" % divtag['class'])
                if divtag['class'].startswith('section_title'):
                    # div contains section title
                    if not divtag.h3:
                        continue
                    key = self.tag_to_string(divtag.h3,False)
                    ans.append(key)
                    self.log("Section name %s" % key)
                    continue
                # div contains article data
                h1tag = divtag.find('h1')
                if not h1tag:
                    continue
                atag = h1tag.find('a',href=True)
                if not atag:
                    continue
                url = self.url_prefix+'/news/todays-paper/'+atag['href']
                #self.log("Section %s" % key)
                #self.log("url %s" % url)
                title = self.tag_to_string(atag,False)
                #self.log("title %s" % title)
                pubdate = ''
                description = ''
                ptag = divtag.find('p');
                if ptag:
                    description = self.tag_to_string(ptag,False)
                    #self.log("description %s" % description)
                author = ''
                autag = divtag.find('h4')
                if autag:
                    author = self.tag_to_string(autag,False)
                    #self.log("author %s" % author)
                if not articles.has_key(key):
                    articles[key] = []
                articles[key].append(dict(title=title,url=url,date=pubdate,description=description,author=author,content=''))
        ans = [(key, articles[key]) for key in ans if articles.has_key(key)]
        return ans
--- a/resources/recipes/cjr.recipe
+++ b/resources/recipes/cjr.recipe
@ -0,0 +1,15 @@
 from calibre.web.feeds.news import BasicNewsRecipe
 class CJR(BasicNewsRecipe):
    title              = u'Columbia Journalism Review'
    __author__         = u'Xanthan Gum'
    description        = 'News about journalism.'
    language = 'en'
    oldest_article = 7
    max_articles_per_feed = 100
    feeds = [(u'News Stories', u'http://www.cjr.org/index.xml')]
    def print_version(self, url):
        return url + '?page=all&print=true'
--- a/resources/recipes/digitaljournal.recipe
+++ b/resources/recipes/digitaljournal.recipe
@ -0,0 +1,52 @@
 __license__   = 'GPL v3'
 __copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
 '''
 digitaljournal.com
 '''
 from calibre.web.feeds.news import BasicNewsRecipe
 class DigitalJournal(BasicNewsRecipe):
    title                 = 'Digital Journal'
    __author__            = 'Darko Miletic'
    description           = 'A Global Citizen Journalism News Network'
    category              = 'news, politics, USA, world'
    publisher             = 'Digital Journal'
    oldest_article        = 2
    max_articles_per_feed = 100
    no_stylesheets        = True
    use_embedded_content  = False
    encoding              = 'utf8'
    language              = 'en'
    conversion_options = {  
                             'comments'        : description
                            ,'tags'            : category
                            ,'language'        : language
                            ,'publisher'       : publisher
                         }
    keep_only_tags   = [dict(name='div', attrs={'class':['article','body']})]
    remove_tags     = [dict(name=['object','table'])]
    feeds = [ 
                (u'Latest News'  , u'http://digitaljournal.com/rss/?feed=latest_news'                   )
               ,(u'Business'     , u'http://digitaljournal.com/rss/?feed=top_news&depname=Business'     )
               ,(u'Entertainment', u'http://digitaljournal.com/rss/?feed=top_news&depname=Entertainment')
               ,(u'Environment'  , u'http://digitaljournal.com/rss/?feed=top_news&depname=Environment'  )
               ,(u'Food'         , u'http://digitaljournal.com/rss/?feed=top_news&depname=Food'         )
               ,(u'Health'       , u'http://digitaljournal.com/rss/?feed=top_news&depname=Health'       )
               ,(u'Internet'     , u'http://digitaljournal.com/rss/?feed=top_news&depname=Internet'     )
               ,(u'Politics'     , u'http://digitaljournal.com/rss/?feed=top_news&depname=Politics'     )
               ,(u'Religion'     , u'http://digitaljournal.com/rss/?feed=top_news&depname=Religion'     )
               ,(u'Science'      , u'http://digitaljournal.com/rss/?feed=top_news&depname=Science'      )
               ,(u'Sports'       , u'http://digitaljournal.com/rss/?feed=top_news&depname=Sports'       )
               ,(u'Technology'   , u'http://digitaljournal.com/rss/?feed=top_news&depname=Technology'   )
               ,(u'World'        , u'http://digitaljournal.com/rss/?feed=top_news&depname=World'        )
               ,(u'Arts'         , u'http://digitaljournal.com/rss/?feed=top_news&depname=Arts'         )
            ]
    def print_version(self, url):
        return url.replace('digitaljournal.com/','digitaljournal.com/print/')
--- a/resources/recipes/edmonton_journal.recipe
+++ b/resources/recipes/edmonton_journal.recipe
@ -0,0 +1,126 @@
 #!/usr/bin/env  python
 __license__   = 'GPL v3'
 '''
 www.canada.com
 '''
 from calibre.web.feeds.recipes import BasicNewsRecipe
 class CanWestPaper(BasicNewsRecipe):
    # un-comment the following three lines for the Edmonton Journal
    title = u'Edmonton Journal'
    url_prefix = 'http://www.edmontonjournal.com'
    description = u'News from Edmonton, AB'
    # un-comment the following three lines for the Calgary Herald
    #title = u'Calgary Herald'
    #url_prefix = 'http://www.calgaryherald.com'
    #description = u'News from Calgary, AB'
    # un-comment the following three lines for the Regina Leader-Post
    #title = u'Regina Leader-Post'
    #url_prefix = 'http://www.leaderpost.com'
    #description = u'News from Regina, SK'
    # un-comment the following three lines for the Saskatoon Star-Phoenix
    #title = u'Saskatoon Star-Phoenix'
    #url_prefix = 'http://www.thestarphoenix.com'
    #description = u'News from Saskatoon, SK'
    # un-comment the following three lines for the Windsor Star
    #title = u'Windsor Star'
    #url_prefix = 'http://www.windsorstar.com'
    #description = u'News from Windsor, ON'
    # un-comment the following three lines for the Ottawa Citizen
    #title = u'Ottawa Citizen'
    #url_prefix = 'http://www.ottawacitizen.com'
    #description = u'News from Ottawa, ON'
    # un-comment the following three lines for the Montreal Gazette
    #title = u'Montreal Gazette'
    #url_prefix = 'http://www.montrealgazette.com'
    #description = u'News from Montreal, QC'
    language = 'en_CA'
    __author__ = 'Nick Redding'
    no_stylesheets = True
    timefmt = ' [%b %d]'
    extra_css = '''
                .timestamp {  font-size:xx-small; display: block; }
                #storyheader { font-size: medium; }
                #storyheader h1 { font-size: x-large; }
                #storyheader h2 { font-size: large;  font-style: italic; }
                .byline { font-size:xx-small; }
                #photocaption { font-size: small; font-style: italic }
                #photocredit { font-size: xx-small; }'''
    keep_only_tags = [dict(name='div', attrs={'id':'storyheader'}),dict(name='div', attrs={'id':'storycontent'})]
    remove_tags = [{'class':'comments'},
                   dict(name='div', attrs={'class':'navbar'}),dict(name='div', attrs={'class':'morelinks'}),
                   dict(name='div', attrs={'class':'viewmore'}),dict(name='li', attrs={'class':'email'}),
                   dict(name='div', attrs={'class':'story_tool_hr'}),dict(name='div', attrs={'class':'clear'}),
                   dict(name='div', attrs={'class':'story_tool'}),dict(name='div', attrs={'class':'copyright'}),
                   dict(name='div', attrs={'class':'rule_grey_solid'}),
                   dict(name='li', attrs={'class':'print'}),dict(name='li', attrs={'class':'share'}),dict(name='ul', attrs={'class':'bullet'})]
    def preprocess_html(self,soup):
        #delete iempty id attributes--they screw up the TOC for unknow reasons
        divtags = soup.findAll('div',attrs={'id':''})
        if divtags:
            for div in divtags:
                del(div['id'])
        return soup
    def parse_index(self):
        soup = self.index_to_soup(self.url_prefix+'/news/todays-paper/index.html')
        articles = {}
        key = 'News'
        ans = ['News']
        # Find each instance of class="sectiontitle", class="featurecontent"
        for divtag in soup.findAll('div',attrs={'class' : ["section_title02","featurecontent"]}):
                #self.log(" div class = %s" % divtag['class'])
                if divtag['class'].startswith('section_title'):
                    # div contains section title
                    if not divtag.h3:
                        continue
                    key = self.tag_to_string(divtag.h3,False)
                    ans.append(key)
                    self.log("Section name %s" % key)
                    continue
                # div contains article data
                h1tag = divtag.find('h1')
                if not h1tag:
                    continue
                atag = h1tag.find('a',href=True)
                if not atag:
                    continue
                url = self.url_prefix+'/news/todays-paper/'+atag['href']
                #self.log("Section %s" % key)
                #self.log("url %s" % url)
                title = self.tag_to_string(atag,False)
                #self.log("title %s" % title)
                pubdate = ''
                description = ''
                ptag = divtag.find('p');
                if ptag:
                    description = self.tag_to_string(ptag,False)
                    #self.log("description %s" % description)
                author = ''
                autag = divtag.find('h4')
                if autag:
                    author = self.tag_to_string(autag,False)
                    #self.log("author %s" % author)
                if not articles.has_key(key):
                    articles[key] = []
                articles[key].append(dict(title=title,url=url,date=pubdate,description=description,author=author,content=''))
        ans = [(key, articles[key]) for key in ans if articles.has_key(key)]
        return ans
--- a/resources/recipes/ftd.recipe
+++ b/resources/recipes/ftd.recipe
@ -9,27 +9,33 @@ from calibre.web.feeds.news import BasicNewsRecipe
 class FTDe(BasicNewsRecipe):
-
+    
    title = 'FTD'
    description = 'Financial Times Deutschland'
    __author__ = 'Oliver Niesner'
    use_embedded_content   = False
    timefmt = ' [%d %b %Y]'
-    language = 'de'
+    language = _('German')
    max_articles_per_feed = 40
    no_stylesheets = True
-
+    
    remove_tags = [dict(id='navi_top'),
 		   dict(id='topbanner'),
 		   dict(id='seitenkopf'),
 		   dict(id='BoxA-0-0-0'),
 		   #dict(id='BoxA-2-0-0'),
 		   dict(id='footer'),
 		   dict(id='rating_open'),
 		   dict(id='ADS_Top'),
 		   dict(id='spinner'),
 		   dict(id='ftd-contentad'),
 		   dict(id='ftd-promo'),
 		   dict(id='nava-50009007-1-0'),
 		   dict(id='navli-50009007-1-0'),
 		   dict(id='Box5000534-0-0-0'),
 		   dict(id='ExpV-1-0-0-1'),
 		   dict(id='ExpV-1-0-0-0'),
 		   dict(id='PollExpV-2-0-0-0'),
 		   dict(id='starRating'),
 		   dict(id='saveRating'),
 		   dict(id='yLayer'),
@ -44,14 +50,20 @@ class FTDe(BasicNewsRecipe):
 		   dict(name='ul', attrs={'class':'nav'}),
 		   dict(name='p', attrs={'class':'articleOptionHead'}),
 		   dict(name='p', attrs={'class':'articleOptionFoot'}),
 		   dict(name='p', attrs={'class':'moreInfo'}),
 		   dict(name='div', attrs={'class':'chartBox'}),
 		   dict(name='div', attrs={'class':'ratingOpt starRatingContainer articleOptionFootFrame'}),
 		   dict(name='div', attrs={'class':'box boxArticleBasic boxComments boxTransparent'}),
-		   dict(name='div', attrs={'class':'box boxNavTabs '}),
+		   dict(name='div', attrs={'class':'box boxNavTabs'}),
 		   dict(name='div', attrs={'class':'boxMMRgtLow'}),
 		   dict(name='span', attrs={'class':'vote_455857'}),
 		   dict(name='div', attrs={'class':'relatedhalb'}),
 		   dict(name='div', attrs={'class':'box boxListScrollOutline'}),
 		   dict(name='div', attrs={'class':'box boxPhotoshow boxImgWide'}),
 		   dict(name='div', attrs={'class':'box boxTeaser boxPhotoshow boxImgWide'}),
 		   dict(name='div', attrs={'class':'box boxTeaser'}),
 		   dict(name='div', attrs={'class':'tagCloud'}),
 		   dict(name='div', attrs={'class':'pollView'}),
 		   dict(name='div', attrs={'class':'box boxArticleBasic boxNavTabsOutline'}),
 		   dict(name='div', attrs={'class':'ftdHpNav'}),
 		   dict(name='div', attrs={'class':'ftdHead'}),
@ -67,11 +79,12 @@ class FTDe(BasicNewsRecipe):
 		   dict(name='div', attrs={'class':'wertungoben'}),
 		   dict(name='div', attrs={'class':'artikelfuss'}),
 		   dict(name='a', attrs={'class':'rating'}),
 		   dict(name='a', attrs={'href':'#rt'}),
 		   dict(name='div', attrs={'class':'articleOptionFootFrame'}),
 		   dict(name='div', attrs={'class':'artikelsplitfaq'})]
-    remove_tags_after = [dict(name='a', attrs={'class':'more'})]
+    #remove_tags_after = [dict(name='a', attrs={'class':'more'})]
-
+    
-    feeds =  [ ('Finanzen', 'http://www.ftd.de/rss2/finanzen/maerkte'),
+    feeds =  [ ('Finanzen', 'http://www.ftd.de/rss2/finanzen/maerkte'), 
 	       ('Meinungshungrige', 'http://www.ftd.de/rss2/meinungshungrige'),
 	       ('Unternehmen', 'http://www.ftd.de/rss2/unternehmen'),
 	       ('Politik', 'http://www.ftd.de/rss2/politik'),
@ -82,8 +95,8 @@ class FTDe(BasicNewsRecipe):
 	       ('Auto', 'http://www.ftd.de/rss2/auto'),
 	       ('Lifestyle', 'http://www.ftd.de/rss2/lifestyle')
-	     ]
+	     ] 
-
+    
    def print_version(self, url):
-        return url + '?mode=print'
+        return url.replace('.html', '.html?mode=print')
--- a/resources/recipes/globe_and_mail.recipe
+++ b/resources/recipes/globe_and_mail.recipe
@ -32,7 +32,7 @@ class GlobeAndMail(BasicNewsRecipe):
 		'gallery-controls', 'video', 'galleryLoading','deck','header',
        'toolsBottom'] },
 		{'class':['credit','inline-img-caption','tab-pointer'] },
-		dict(name='div', attrs={'id':'lead-photo'}),
+		dict(name='div', attrs={'id':['lead-photo', 'most-popular-story']}),
 		dict(name='div', attrs={'class':'right'}),
 		dict(name='div', attrs={'id':'footer'}),
 		dict(name='div', attrs={'id':'beta-msg'}),
--- a/resources/recipes/kitsapun.recipe
+++ b/resources/recipes/kitsapun.recipe
@ -0,0 +1,44 @@
 __license__   = 'GPL v3'
 __copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
 '''
 www.kitsapun.com
 '''
 from calibre.web.feeds.news import BasicNewsRecipe
 class Kitsapsun(BasicNewsRecipe):
    title                 = 'Kitsap Sun'
    __author__            = 'Darko Miletic'
    description           = 'News from Kitsap County'
    publisher             = 'Scripps Interactive Newspapers Group'
    category              = 'news, Kitsap county, USA'    
    language              = 'en'
    oldest_article        = 2
    max_articles_per_feed = 100
    no_stylesheets        = True
    encoding              = 'cp1252'
    use_embedded_content  = False
    conversion_options = {  
                             'comments' : description
                            ,'tags'     : category
                            ,'language' : language
                            ,'publisher': publisher
                         }
    keep_only_tags = [dict(name='div', attrs={'id':['story_meta','story_content']})]
    remove_tags = [dict(name=['object','link','embed','form','iframe'])]
    feeds = [
               (u'News'         , u'http://www.kitsapsun.com/rss/headlines/news/'         )
              ,(u'Business'     , u'http://www.kitsapsun.com/rss/headlines/business/'     )
              ,(u'Communities'  , u'http://www.kitsapsun.com/rss/headlines/communities/'  )
              ,(u'Entertainment', u'http://www.kitsapsun.com/rss/headlines/entertainment/')
              ,(u'Lifestyles'   , u'http://www.kitsapsun.com/rss/headlines/lifestyles/'   )
            ]
    def print_version(self, url):
        return url.rpartition('/')[0] + '/?print=1'
--- a/resources/recipes/ledevoir.recipe
+++ b/resources/recipes/ledevoir.recipe
@ -1,79 +1,79 @@
-#!/usr/bin/env  python
+#!/usr/bin/env  python
-__license__   = 'GPL v3'
+__license__   = 'GPL v3'
-__author__    = 'Lorenzo Vigentini'
+__author__    = 'Lorenzo Vigentini'
-__copyright__ = '2009, Lorenzo Vigentini <l.vigentini at gmail.com>'
+__copyright__ = '2009, Lorenzo Vigentini <l.vigentini at gmail.com>'
-__version__     = 'v1.01'
+__version__     = 'v1.01'
-__date__        = '14, January 2010'
+__date__        = '14, January 2010'
-__description__   = 'Canadian Paper '
+__description__   = 'Canadian Paper '
-
+
-'''
+'''
-http://www.ledevoir.com/
+http://www.ledevoir.com/
-'''
+'''
-
+
-from calibre.web.feeds.news import BasicNewsRecipe
+from calibre.web.feeds.news import BasicNewsRecipe
-
+
-class ledevoir(BasicNewsRecipe):
+class ledevoir(BasicNewsRecipe):
-    author        = 'Lorenzo Vigentini'
+    author        = 'Lorenzo Vigentini'
-    description   = 'Canadian Paper'
+    description   = 'Canadian Paper'
-
+
-    cover_url      = 'http://www.ledevoir.com/images/ul/graphiques/logo_devoir.gif'
+    cover_url      = 'http://www.ledevoir.com/images/ul/graphiques/logo_devoir.gif'
-    title          = u'Le Devoir'
+    title          = u'Le Devoir'
-    publisher      = 'leDevoir.com'
+    publisher      = 'leDevoir.com'
-    category       = 'News, finance, economy, politics'
+    category       = 'News, finance, economy, politics'
-
+
-    language       = 'fr'
+    language       = 'fr'
-    encoding       = 'utf-8'
+    encoding       = 'utf-8'
-    timefmt        = '[%a, %d %b, %Y]'
+    timefmt        = '[%a, %d %b, %Y]'
-
+
-    max_articles_per_feed = 50
+    max_articles_per_feed = 50
-    use_embedded_content  = False
+    use_embedded_content  = False
-    recursion             = 10
+    recursion             = 10
-
+
-    remove_javascript     = True
+    remove_javascript     = True
-    no_stylesheets        = True
+    no_stylesheets        = True
-
+
-    keep_only_tags  = [
+    keep_only_tags  = [
-                        dict(name='div', attrs={'id':'article'}),
+                        dict(name='div', attrs={'id':'article'}),
-                        dict(name='ul', attrs={'id':'ariane'})
+                        dict(name='ul', attrs={'id':'ariane'})
-                    ]
+                    ]
-
+
-    remove_tags     = [
+    remove_tags     = [
-                        dict(name='div', attrs={'id':'dialog'}),
+                        dict(name='div', attrs={'id':'dialog'}),
-                        dict(name='div', attrs={'class':['interesse_actions','reactions']}),
+                        dict(name='div', attrs={'class':['interesse_actions','reactions']}),
-                        dict(name='ul', attrs={'class':'mots_cles'}),
+                        dict(name='ul', attrs={'class':'mots_cles'}),
-                        dict(name='a', attrs={'class':'haut'}),
+                        dict(name='a', attrs={'class':'haut'}),
-                        dict(name='h5', attrs={'class':'interesse_actions'})
+                        dict(name='h5', attrs={'class':'interesse_actions'})
-                    ]
+                    ]
-
+
-    feeds          = [
+    feeds          = [
-                       (u'A la une', 'http://www.ledevoir.com/rss/manchettes.xml'),
+                       (u'A la une', 'http://www.ledevoir.com/rss/manchettes.xml'),
-                       (u'Edition complete', 'http://feeds2.feedburner.com/fluxdudevoir'),
+                       (u'Edition complete', 'http://feeds2.feedburner.com/fluxdudevoir'),
-                       (u'Opinions', 'http://www.ledevoir.com/rss/opinions.xml'),
+                       (u'Opinions', 'http://www.ledevoir.com/rss/opinions.xml'),
-                       (u'Chroniques', 'http://www.ledevoir.com/rss/chroniques.xml'),
+                       (u'Chroniques', 'http://www.ledevoir.com/rss/chroniques.xml'),
-                       (u'Politique', 'http://www.ledevoir.com/rss/section/politique.xml?id=51'),
+                       (u'Politique', 'http://www.ledevoir.com/rss/section/politique.xml?id=51'),
-                       (u'International', 'http://www.ledevoir.com/rss/section/international.xml?id=76'),
+                       (u'International', 'http://www.ledevoir.com/rss/section/international.xml?id=76'),
-                       (u'Culture', 'http://www.ledevoir.com/rss/section/culture.xml?id=48'),
+                       (u'Culture', 'http://www.ledevoir.com/rss/section/culture.xml?id=48'),
-                       (u'Environnement', 'http://www.ledevoir.com/rss/section/environnement.xml?id=78'),
+                       (u'Environnement', 'http://www.ledevoir.com/rss/section/environnement.xml?id=78'),
-                       (u'Societe', 'http://www.ledevoir.com/rss/section/societe.xml?id=52'),
+                       (u'Societe', 'http://www.ledevoir.com/rss/section/societe.xml?id=52'),
-                       (u'Economie', 'http://www.ledevoir.com/rss/section/economie.xml?id=49'),
+                       (u'Economie', 'http://www.ledevoir.com/rss/section/economie.xml?id=49'),
-                       (u'Sports', 'http://www.ledevoir.com/rss/section/sports.xml?id=85'),
+                       (u'Sports', 'http://www.ledevoir.com/rss/section/sports.xml?id=85'),
-                       (u'Loisirs', 'http://www.ledevoir.com/rss/section/loisirs.xml?id=50')
+                       (u'Loisirs', 'http://www.ledevoir.com/rss/section/loisirs.xml?id=50')
-                     ]
+                     ]
-
+
-    extra_css = '''
+    extra_css = '''
-                h1 {color:#1C1E7C;font-family:Times,Georgia,serif;font-size:1.85em;font-size-adjust:none;font-stretch:normal;font-style:normal;font-variant:normal;font-weight:bold;line-height:1.2em;margin:0 0 5px;}
+                h1 {color:#1C1E7C;font-family:Times,Georgia,serif;font-size:1.85em;font-size-adjust:none;font-stretch:normal;font-style:normal;font-variant:normal;font-weight:bold;line-height:1.2em;margin:0 0 5px;}
-                h2 {color:#333333;font-family:Times,Georgia,serif;font-size:1.5em;font-size-adjust:none;font-stretch:normal;font-style:normal;font-variant:normal;font-weight:normal;line-height:1.2em;margin:0 0 5px;}
+                h2 {color:#333333;font-family:Times,Georgia,serif;font-size:1.5em;font-size-adjust:none;font-stretch:normal;font-style:normal;font-variant:normal;font-weight:normal;line-height:1.2em;margin:0 0 5px;}
-                h3 {color:#4D4D4D;font-family:Arial,Helvetica,sans-serif; font-size:15px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:14px;}
+                h3 {color:#4D4D4D;font-family:Arial,Helvetica,sans-serif; font-size:15px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:14px;}
-                h4 {color:#333333; font-family:Arial,Helvetica,sans-serif;font-size:13px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:14px; }
+                h4 {color:#333333; font-family:Arial,Helvetica,sans-serif;font-size:13px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:14px; }
-                h5 {color:#333333; font-family:Arial,Helvetica,sans-serif; font-size:11px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:14px; text-transform:uppercase;}
+                h5 {color:#333333; font-family:Arial,Helvetica,sans-serif; font-size:11px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:14px; text-transform:uppercase;}
-                .specs {line-height:1em;margin:1px 0;}
+                .specs {line-height:1em;margin:1px 0;}
-                .specs span.auteur {font:0.85em/1.1em Arial, Verdana, sans-serif;color:#787878;}
+                .specs span.auteur {font:0.85em/1.1em Arial, Verdana, sans-serif;color:#787878;}
-                .specs span.auteur a,
+                .specs span.auteur a,
-                .specs span.auteur span {text-transform:uppercase;color:#787878;}
+                .specs span.auteur span {text-transform:uppercase;color:#787878;}
-                .specs .date {font:0.85em/1.1em Arial, Verdana, sans-serif;color:#787878;}
+                .specs .date {font:0.85em/1.1em Arial, Verdana, sans-serif;color:#787878;}
-                ul#ariane {list-style-type:none;margin:0;padding:5px 0 8px 0;font:0.85em/1.2em Arial, Verdana, sans-serif;color:#2E2E2E;border-bottom:10px solid #fff;}
+                ul#ariane {list-style-type:none;margin:0;padding:5px 0 8px 0;font:0.85em/1.2em Arial, Verdana, sans-serif;color:#2E2E2E;border-bottom:10px solid #fff;}
-                ul#ariane li {display:inline;}
+                ul#ariane li {display:inline;}
-                ul#ariane a {color:#2E2E2E;text-decoration:underline;}
+                ul#ariane a {color:#2E2E2E;text-decoration:underline;}
-                .credit {color:#787878;font-size:0.71em;line-height:1.1em;font-weight:bold;}
+                .credit {color:#787878;font-size:0.71em;line-height:1.1em;font-weight:bold;}
-                .texte {font-size:1.15em;line-height:1.4em;margin-bottom:17px;}
+                .texte {font-size:1.15em;line-height:1.4em;margin-bottom:17px;}
-                '''
+                '''
--- a/resources/recipes/montreal_gazette.recipe
+++ b/resources/recipes/montreal_gazette.recipe
@ -0,0 +1,96 @@
 #!/usr/bin/env  python
 __license__   = 'GPL v3'
 '''
 www.canada.com
 '''
 from calibre.web.feeds.recipes import BasicNewsRecipe
 class CanWestPaper(BasicNewsRecipe):
    # un-comment the following three lines for the Montreal Gazette
    title = u'Montreal Gazette'
    url_prefix = 'http://www.montrealgazette.com'
    description = u'News from Montreal, QC'
    language = 'en_CA'
    __author__ = 'Nick Redding'
    no_stylesheets = True
    timefmt = ' [%b %d]'
    extra_css = '''
                .timestamp {  font-size:xx-small; display: block; }
                #storyheader { font-size: medium; }
                #storyheader h1 { font-size: x-large; }
                #storyheader h2 { font-size: large;  font-style: italic; }
                .byline { font-size:xx-small; }
                #photocaption { font-size: small; font-style: italic }
                #photocredit { font-size: xx-small; }'''
    keep_only_tags = [dict(name='div', attrs={'id':'storyheader'}),dict(name='div', attrs={'id':'storycontent'})]
    remove_tags = [{'class':'comments'},
                   dict(name='div', attrs={'class':'navbar'}),dict(name='div', attrs={'class':'morelinks'}),
                   dict(name='div', attrs={'class':'viewmore'}),dict(name='li', attrs={'class':'email'}),
                   dict(name='div', attrs={'class':'story_tool_hr'}),dict(name='div', attrs={'class':'clear'}),
                   dict(name='div', attrs={'class':'story_tool'}),dict(name='div', attrs={'class':'copyright'}),
                   dict(name='div', attrs={'class':'rule_grey_solid'}),
                   dict(name='li', attrs={'class':'print'}),dict(name='li', attrs={'class':'share'}),dict(name='ul', attrs={'class':'bullet'})]
    def preprocess_html(self,soup):
        #delete iempty id attributes--they screw up the TOC for unknow reasons
        divtags = soup.findAll('div',attrs={'id':''})
        if divtags:
            for div in divtags:
                del(div['id'])
        return soup
    def parse_index(self):
        soup = self.index_to_soup(self.url_prefix+'/news/todays-paper/index.html')
        articles = {}
        key = 'News'
        ans = ['News']
        # Find each instance of class="sectiontitle", class="featurecontent"
        for divtag in soup.findAll('div',attrs={'class' : ["section_title02","featurecontent"]}):
                #self.log(" div class = %s" % divtag['class'])
                if divtag['class'].startswith('section_title'):
                    # div contains section title
                    if not divtag.h3:
                        continue
                    key = self.tag_to_string(divtag.h3,False)
                    ans.append(key)
                    self.log("Section name %s" % key)
                    continue
                # div contains article data
                h1tag = divtag.find('h1')
                if not h1tag:
                    continue
                atag = h1tag.find('a',href=True)
                if not atag:
                    continue
                url = self.url_prefix+'/news/todays-paper/'+atag['href']
                #self.log("Section %s" % key)
                #self.log("url %s" % url)
                title = self.tag_to_string(atag,False)
                #self.log("title %s" % title)
                pubdate = ''
                description = ''
                ptag = divtag.find('p');
                if ptag:
                    description = self.tag_to_string(ptag,False)
                    #self.log("description %s" % description)
                author = ''
                autag = divtag.find('h4')
                if autag:
                    author = self.tag_to_string(autag,False)
                    #self.log("author %s" % author)
                if not articles.has_key(key):
                    articles[key] = []
                articles[key].append(dict(title=title,url=url,date=pubdate,description=description,author=author,content=''))
        ans = [(key, articles[key]) for key in ans if articles.has_key(key)]
        return ans
--- a/resources/recipes/ottawa_citizen.recipe
+++ b/resources/recipes/ottawa_citizen.recipe
@ -0,0 +1,101 @@
 #!/usr/bin/env  python
 __license__   = 'GPL v3'
 '''
 www.canada.com
 '''
 from calibre.web.feeds.recipes import BasicNewsRecipe
 class CanWestPaper(BasicNewsRecipe):
    # un-comment the following three lines for the Ottawa Citizen
    title = u'Ottawa Citizen'
    url_prefix = 'http://www.ottawacitizen.com'
    description = u'News from Ottawa, ON'
    # un-comment the following three lines for the Montreal Gazette
    #title = u'Montreal Gazette'
    #url_prefix = 'http://www.montrealgazette.com'
    #description = u'News from Montreal, QC'
    language = 'en_CA'
    __author__ = 'Nick Redding'
    no_stylesheets = True
    timefmt = ' [%b %d]'
    extra_css = '''
                .timestamp {  font-size:xx-small; display: block; }
                #storyheader { font-size: medium; }
                #storyheader h1 { font-size: x-large; }
                #storyheader h2 { font-size: large;  font-style: italic; }
                .byline { font-size:xx-small; }
                #photocaption { font-size: small; font-style: italic }
                #photocredit { font-size: xx-small; }'''
    keep_only_tags = [dict(name='div', attrs={'id':'storyheader'}),dict(name='div', attrs={'id':'storycontent'})]
    remove_tags = [{'class':'comments'},
                   dict(name='div', attrs={'class':'navbar'}),dict(name='div', attrs={'class':'morelinks'}),
                   dict(name='div', attrs={'class':'viewmore'}),dict(name='li', attrs={'class':'email'}),
                   dict(name='div', attrs={'class':'story_tool_hr'}),dict(name='div', attrs={'class':'clear'}),
                   dict(name='div', attrs={'class':'story_tool'}),dict(name='div', attrs={'class':'copyright'}),
                   dict(name='div', attrs={'class':'rule_grey_solid'}),
                   dict(name='li', attrs={'class':'print'}),dict(name='li', attrs={'class':'share'}),dict(name='ul', attrs={'class':'bullet'})]
    def preprocess_html(self,soup):
        #delete iempty id attributes--they screw up the TOC for unknow reasons
        divtags = soup.findAll('div',attrs={'id':''})
        if divtags:
            for div in divtags:
                del(div['id'])
        return soup
    def parse_index(self):
        soup = self.index_to_soup(self.url_prefix+'/news/todays-paper/index.html')
        articles = {}
        key = 'News'
        ans = ['News']
        # Find each instance of class="sectiontitle", class="featurecontent"
        for divtag in soup.findAll('div',attrs={'class' : ["section_title02","featurecontent"]}):
                #self.log(" div class = %s" % divtag['class'])
                if divtag['class'].startswith('section_title'):
                    # div contains section title
                    if not divtag.h3:
                        continue
                    key = self.tag_to_string(divtag.h3,False)
                    ans.append(key)
                    self.log("Section name %s" % key)
                    continue
                # div contains article data
                h1tag = divtag.find('h1')
                if not h1tag:
                    continue
                atag = h1tag.find('a',href=True)
                if not atag:
                    continue
                url = self.url_prefix+'/news/todays-paper/'+atag['href']
                #self.log("Section %s" % key)
                #self.log("url %s" % url)
                title = self.tag_to_string(atag,False)
                #self.log("title %s" % title)
                pubdate = ''
                description = ''
                ptag = divtag.find('p');
                if ptag:
                    description = self.tag_to_string(ptag,False)
                    #self.log("description %s" % description)
                author = ''
                autag = divtag.find('h4')
                if autag:
                    author = self.tag_to_string(autag,False)
                    #self.log("author %s" % author)
                if not articles.has_key(key):
                    articles[key] = []
                articles[key].append(dict(title=title,url=url,date=pubdate,description=description,author=author,content=''))
        ans = [(key, articles[key]) for key in ans if articles.has_key(key)]
        return ans
--- a/resources/recipes/pajama.recipe
+++ b/resources/recipes/pajama.recipe
@ -0,0 +1,48 @@
 from calibre.web.feeds.news import BasicNewsRecipe
 from calibre.ebooks.BeautifulSoup import BeautifulSoup
 class PajamasMedia(BasicNewsRecipe):
    title          = u'Pajamas Media'
    description = u'Provides exclusive news and opinion for forty countries.'
    language       = 'en'
    __author__     = 'Krittika Goyal'
    oldest_article = 1 #days
    max_articles_per_feed = 25
    recursions = 1
    match_regexps = [r'http://pajamasmedia.com/blog/.*/2/$']
    #encoding = 'latin1'
    remove_stylesheets = True
    #remove_tags_before = dict(name='h1', attrs={'class':'heading'})
    remove_tags_after  = dict(name='div', attrs={'class':'paged-nav'})
    remove_tags = [
       dict(name='iframe'),
       dict(name='div', attrs={'class':['pages']}),
       #dict(name='div', attrs={'id':['bookmark']}),
       #dict(name='span', attrs={'class':['related_link', 'slideshowcontrols']}),
       #dict(name='ul', attrs={'class':'articleTools'}),
    ]
    feeds          = [
 ('pajamas Media',
 'http://feeds.feedburner.com/PajamasMedia'),
 ]
    def preprocess_html(self, soup):
        story = soup.find(name='div', attrs={'id':'innerpage-content'})
        #td = heading.findParent(name='td')
        #td.extract()
        soup = BeautifulSoup('<html><head><title>t</title></head><body></body></html>')
        body = soup.find(name='body')
        body.insert(0, story)
        return soup
    def postprocess_html(self, soup, first):
        if not first:
            h = soup.find(attrs={'class':'innerpage-header'})
            if h: h.extract()
            auth = soup.find(attrs={'class':'author'})
            if auth: auth.extract()
        return soup
--- a/resources/recipes/physics_today.recipe
+++ b/resources/recipes/physics_today.recipe
@ -8,8 +8,7 @@ class Physicstoday(BasicNewsRecipe):
    description           = u'Physics Today magazine'
    publisher             = 'American Institute of Physics'
    category              = 'Physics'
-    language = 'en'
+    language              = 'en'
    cover_url = strftime('http://ptonline.aip.org/journals/doc/PHTOAD-home/jrnls/images/medcover%m_%Y.jpg')
    oldest_article = 30
    max_articles_per_feed = 100
@ -30,11 +29,11 @@ class Physicstoday(BasicNewsRecipe):
    def get_browser(self):
        br = BasicNewsRecipe.get_browser()
        if self.username is not None and self.password is not None:
-            br.open('http://www.physicstoday.org/pt/sso_login.jsp')
+            br.open('http://ptonline.aip.org/journals/doc/PHTOAD-home/pt_login.jsp?fl=f')
-            br.select_form(name='login')
+            br.select_form(name='login_form')
            br['username'] = self.username
            br['password'] = self.password
            br.submit()
        return br
-    feeds          = [(u'All', u'http://www.physicstoday.org/feed.xml')]
+    feeds          = [(u'All', u'http://www.physicstoday.org/feed.xml')]
--- a/resources/recipes/readers_digest.recipe
+++ b/resources/recipes/readers_digest.recipe
@ -0,0 +1,188 @@
 #!/usr/bin/env  python
 __license__   = 'GPL v3'
 '''
 '''
 from calibre.web.feeds.recipes import BasicNewsRecipe
 from calibre.web.feeds import Feed
 class ReadersDigest(BasicNewsRecipe):
    title       = 'Readers Digest'
    __author__  = 'BrianG'
    language = 'en'
    description = 'Readers Digest Feeds'
    no_stylesheets        = True
    use_embedded_content  = False
    oldest_article = 60
    max_articles_per_feed = 200
    language = 'en'
    remove_javascript     = True
    extra_css      = ''' h1 {font-family:georgia,serif;color:#000000;}
                        .mainHd{font-family:georgia,serif;color:#000000;}
                         h2 {font-family:Arial,Sans-serif;}
                        .name{font-family:Arial,Sans-serif; font-size:x-small;font-weight:bold; }
                        .date{font-family:Arial,Sans-serif; font-size:x-small ;color:#999999;}
                        .byline{font-family:Arial,Sans-serif; font-size:x-small ;}
                        .photoBkt{ font-size:x-small ;}
                        .vertPhoto{font-size:x-small ;}
                        .credits{font-family:Arial,Sans-serif; font-size:x-small ;color:gray;}
                        .credit{font-family:Arial,Sans-serif; font-size:x-small ;color:gray;}
                        .artTxt{font-family:georgia,serif;}
                        .caption{font-family:georgia,serif; font-size:x-small;color:#333333;}
                        .credit{font-family:georgia,serif; font-size:x-small;color:#999999;}
                        a:link{color:#CC0000;}
                        .breadcrumb{font-family:Arial,Sans-serif;font-size:x-small;}
                        '''
    remove_tags = [
        dict(name='h4', attrs={'class':'close'}),
        dict(name='div', attrs={'class':'fromLine'}),
        dict(name='img', attrs={'class':'colorTag'}),
        dict(name='div', attrs={'id':'sponsorArticleHeader'}),
        dict(name='div', attrs={'class':'horizontalAd'}),
        dict(name='div', attrs={'id':'imageCounterLeft'}),
        dict(name='div', attrs={'id':'commentsPrint'})
        ]
    feeds = [
            ('New in RD', 'http://feeds.rd.com/ReadersDigest'),
            ('Jokes', 'http://feeds.rd.com/ReadersDigestJokes'),
            ('Cartoons', 'http://feeds.rd.com/ReadersDigestCartoons'),
            ('Blogs','http://feeds.rd.com/ReadersDigestBlogs')
        ]
    cover_url = 'http://www.rd.com/images/logo-main-rd.gif'
 #-------------------------------------------------------------------------------------------------
    def print_version(self, url):
        # Get the identity number of the current article and append it to the root print URL
        if url.find('/article') > 0:
            ident = url[url.find('/article')+8:url.find('.html?')-4]
            url = 'http://www.rd.com/content/printContent.do?contentId=' + ident
        elif url.find('/post') > 0:
            # in this case, have to get the page itself to derive the Print page.
            soup = self.index_to_soup(url)
            newsoup = soup.find('ul',attrs={'class':'printBlock'})
            url = 'http://www.rd.com' + newsoup('a')[0]['href']
            url = url[0:url.find('&Keep')]
        return url
 #-------------------------------------------------------------------------------------------------
    def parse_index(self):
        pages = [
                ('Your America','http://www.rd.com/your-america-inspiring-people-and-stories', 'channelLeftContainer',{'class':'moreLeft'}),
                # useless recipes ('Living Healthy','http://www.rd.com/living-healthy', 'channelLeftContainer',{'class':'moreLeft'}),
                ('Advice and Know-How','http://www.rd.com/advice-and-know-how', 'channelLeftContainer',{'class':'moreLeft'})
            ]
        feeds = []
        for page in pages:
            section, url, divider, attrList = page
            newArticles = self.page_parse(url, divider, attrList)
            feeds.append((section,newArticles))
        # after the pages of the site have been processed, parse several RSS feeds for additional sections
        newfeeds = Feed()
        newfeeds = self.parse_rss()
        # The utility code in parse_rss returns a Feed object.  Convert each feed/article combination into a form suitable
        # for this module (parse_index).
        for feed in newfeeds:
            newArticles = []
            for article in feed.articles:
                newArt = {
                            'title' : article.title,
                            'url'   : article.url,
                            'date'  : article.date,
                            'description' : article.text_summary
                        }
                newArticles.append(newArt)
            # New and Blogs should be the first two feeds.
            if feed.title == 'New in RD':
                feeds.insert(0,(feed.title,newArticles))
            elif feed.title == 'Blogs':
                feeds.insert(1,(feed.title,newArticles))
            else:
                feeds.append((feed.title,newArticles))
        return feeds
 #-------------------------------------------------------------------------------------------------
    def page_parse(self, mainurl, divider, attrList):
        articles = []
        mainsoup = self.index_to_soup(mainurl)
        for item in mainsoup.findAll(attrs=attrList):
            newArticle = {
                        'title' : item('img')[0]['alt'],
                        'url'   : 'http://www.rd.com'+item('a')[0]['href'],
                        'date'  : '',
                        'description' : ''
                    }
            articles.append(newArticle)
        return articles
 #-------------------------------------------------------------------------------------------------
    def parse_rss (self):
        # Do the "official" parse_feeds first
        feeds = BasicNewsRecipe.parse_feeds(self)
        # Loop thru the articles in all feeds to find articles with "recipe" in it
        recipeArticles = []
        for curfeed in feeds:
            delList = []
            for a,curarticle in enumerate(curfeed.articles):
                if curarticle.title.upper().find('RECIPE') >= 0:
                    recipeArticles.append(curarticle)
                    delList.append(curarticle)
            if len(delList)>0:
                for d in delList:
                    index = curfeed.articles.index(d)
                    curfeed.articles[index:index+1] = []
        # If there are any recipes found, create a new Feed object and append.
        if len(recipeArticles) > 0:
            pfeed = Feed()
            pfeed.title = 'Recipes'
            pfeed.descrition = 'Recipe Feed (Virtual)'
            pfeed.image_url  = None
            pfeed.oldest_article = 30
            pfeed.id_counter = len(recipeArticles)
            # Create a new Feed, add the recipe articles, and then append
            # to "official" list of feeds
            pfeed.articles = recipeArticles[:]
            feeds.append(pfeed)
        return feeds
--- a/resources/recipes/regina_leader_post.recipe
+++ b/resources/recipes/regina_leader_post.recipe
@ -0,0 +1,116 @@
 #!/usr/bin/env  python
 __license__   = 'GPL v3'
 '''
 www.canada.com
 '''
 from calibre.web.feeds.recipes import BasicNewsRecipe
 class CanWestPaper(BasicNewsRecipe):
    # un-comment the following three lines for the Regina Leader-Post
    title = u'Regina Leader-Post'
    url_prefix = 'http://www.leaderpost.com'
    description = u'News from Regina, SK'
    # un-comment the following three lines for the Saskatoon Star-Phoenix
    #title = u'Saskatoon Star-Phoenix'
    #url_prefix = 'http://www.thestarphoenix.com'
    #description = u'News from Saskatoon, SK'
    # un-comment the following three lines for the Windsor Star
    #title = u'Windsor Star'
    #url_prefix = 'http://www.windsorstar.com'
    #description = u'News from Windsor, ON'
    # un-comment the following three lines for the Ottawa Citizen
    #title = u'Ottawa Citizen'
    #url_prefix = 'http://www.ottawacitizen.com'
    #description = u'News from Ottawa, ON'
    # un-comment the following three lines for the Montreal Gazette
    #title = u'Montreal Gazette'
    #url_prefix = 'http://www.montrealgazette.com'
    #description = u'News from Montreal, QC'
    language = 'en_CA'
    __author__ = 'Nick Redding'
    no_stylesheets = True
    timefmt = ' [%b %d]'
    extra_css = '''
                .timestamp {  font-size:xx-small; display: block; }
                #storyheader { font-size: medium; }
                #storyheader h1 { font-size: x-large; }
                #storyheader h2 { font-size: large;  font-style: italic; }
                .byline { font-size:xx-small; }
                #photocaption { font-size: small; font-style: italic }
                #photocredit { font-size: xx-small; }'''
    keep_only_tags = [dict(name='div', attrs={'id':'storyheader'}),dict(name='div', attrs={'id':'storycontent'})]
    remove_tags = [{'class':'comments'},
                   dict(name='div', attrs={'class':'navbar'}),dict(name='div', attrs={'class':'morelinks'}),
                   dict(name='div', attrs={'class':'viewmore'}),dict(name='li', attrs={'class':'email'}),
                   dict(name='div', attrs={'class':'story_tool_hr'}),dict(name='div', attrs={'class':'clear'}),
                   dict(name='div', attrs={'class':'story_tool'}),dict(name='div', attrs={'class':'copyright'}),
                   dict(name='div', attrs={'class':'rule_grey_solid'}),
                   dict(name='li', attrs={'class':'print'}),dict(name='li', attrs={'class':'share'}),dict(name='ul', attrs={'class':'bullet'})]
    def preprocess_html(self,soup):
        #delete iempty id attributes--they screw up the TOC for unknow reasons
        divtags = soup.findAll('div',attrs={'id':''})
        if divtags:
            for div in divtags:
                del(div['id'])
        return soup
    def parse_index(self):
        soup = self.index_to_soup(self.url_prefix+'/news/todays-paper/index.html')
        articles = {}
        key = 'News'
        ans = ['News']
        # Find each instance of class="sectiontitle", class="featurecontent"
        for divtag in soup.findAll('div',attrs={'class' : ["section_title02","featurecontent"]}):
                #self.log(" div class = %s" % divtag['class'])
                if divtag['class'].startswith('section_title'):
                    # div contains section title
                    if not divtag.h3:
                        continue
                    key = self.tag_to_string(divtag.h3,False)
                    ans.append(key)
                    self.log("Section name %s" % key)
                    continue
                # div contains article data
                h1tag = divtag.find('h1')
                if not h1tag:
                    continue
                atag = h1tag.find('a',href=True)
                if not atag:
                    continue
                url = self.url_prefix+'/news/todays-paper/'+atag['href']
                #self.log("Section %s" % key)
                #self.log("url %s" % url)
                title = self.tag_to_string(atag,False)
                #self.log("title %s" % title)
                pubdate = ''
                description = ''
                ptag = divtag.find('p');
                if ptag:
                    description = self.tag_to_string(ptag,False)
                    #self.log("description %s" % description)
                author = ''
                autag = divtag.find('h4')
                if autag:
                    author = self.tag_to_string(autag,False)
                    #self.log("author %s" % author)
                if not articles.has_key(key):
                    articles[key] = []
                articles[key].append(dict(title=title,url=url,date=pubdate,description=description,author=author,content=''))
        ans = [(key, articles[key]) for key in ans if articles.has_key(key)]
        return ans
--- a/resources/recipes/saskatoon_star_phoenix.recipe
+++ b/resources/recipes/saskatoon_star_phoenix.recipe
@ -0,0 +1,111 @@
 #!/usr/bin/env  python
 __license__   = 'GPL v3'
 '''
 www.canada.com
 '''
 from calibre.web.feeds.recipes import BasicNewsRecipe
 class CanWestPaper(BasicNewsRecipe):
    # un-comment the following three lines for the Saskatoon Star-Phoenix
    title = u'Saskatoon Star-Phoenix'
    url_prefix = 'http://www.thestarphoenix.com'
    description = u'News from Saskatoon, SK'
    # un-comment the following three lines for the Windsor Star
    #title = u'Windsor Star'
    #url_prefix = 'http://www.windsorstar.com'
    #description = u'News from Windsor, ON'
    # un-comment the following three lines for the Ottawa Citizen
    #title = u'Ottawa Citizen'
    #url_prefix = 'http://www.ottawacitizen.com'
    #description = u'News from Ottawa, ON'
    # un-comment the following three lines for the Montreal Gazette
    #title = u'Montreal Gazette'
    #url_prefix = 'http://www.montrealgazette.com'
    #description = u'News from Montreal, QC'
    language = 'en_CA'
    __author__ = 'Nick Redding'
    no_stylesheets = True
    timefmt = ' [%b %d]'
    extra_css = '''
                .timestamp {  font-size:xx-small; display: block; }
                #storyheader { font-size: medium; }
                #storyheader h1 { font-size: x-large; }
                #storyheader h2 { font-size: large;  font-style: italic; }
                .byline { font-size:xx-small; }
                #photocaption { font-size: small; font-style: italic }
                #photocredit { font-size: xx-small; }'''
    keep_only_tags = [dict(name='div', attrs={'id':'storyheader'}),dict(name='div', attrs={'id':'storycontent'})]
    remove_tags = [{'class':'comments'},
                   dict(name='div', attrs={'class':'navbar'}),dict(name='div', attrs={'class':'morelinks'}),
                   dict(name='div', attrs={'class':'viewmore'}),dict(name='li', attrs={'class':'email'}),
                   dict(name='div', attrs={'class':'story_tool_hr'}),dict(name='div', attrs={'class':'clear'}),
                   dict(name='div', attrs={'class':'story_tool'}),dict(name='div', attrs={'class':'copyright'}),
                   dict(name='div', attrs={'class':'rule_grey_solid'}),
                   dict(name='li', attrs={'class':'print'}),dict(name='li', attrs={'class':'share'}),dict(name='ul', attrs={'class':'bullet'})]
    def preprocess_html(self,soup):
        #delete iempty id attributes--they screw up the TOC for unknow reasons
        divtags = soup.findAll('div',attrs={'id':''})
        if divtags:
            for div in divtags:
                del(div['id'])
        return soup
    def parse_index(self):
        soup = self.index_to_soup(self.url_prefix+'/news/todays-paper/index.html')
        articles = {}
        key = 'News'
        ans = ['News']
        # Find each instance of class="sectiontitle", class="featurecontent"
        for divtag in soup.findAll('div',attrs={'class' : ["section_title02","featurecontent"]}):
                #self.log(" div class = %s" % divtag['class'])
                if divtag['class'].startswith('section_title'):
                    # div contains section title
                    if not divtag.h3:
                        continue
                    key = self.tag_to_string(divtag.h3,False)
                    ans.append(key)
                    self.log("Section name %s" % key)
                    continue
                # div contains article data
                h1tag = divtag.find('h1')
                if not h1tag:
                    continue
                atag = h1tag.find('a',href=True)
                if not atag:
                    continue
                url = self.url_prefix+'/news/todays-paper/'+atag['href']
                #self.log("Section %s" % key)
                #self.log("url %s" % url)
                title = self.tag_to_string(atag,False)
                #self.log("title %s" % title)
                pubdate = ''
                description = ''
                ptag = divtag.find('p');
                if ptag:
                    description = self.tag_to_string(ptag,False)
                    #self.log("description %s" % description)
                author = ''
                autag = divtag.find('h4')
                if autag:
                    author = self.tag_to_string(autag,False)
                    #self.log("author %s" % author)
                if not articles.has_key(key):
                    articles[key] = []
                articles[key].append(dict(title=title,url=url,date=pubdate,description=description,author=author,content=''))
        ans = [(key, articles[key]) for key in ans if articles.has_key(key)]
        return ans
--- a/resources/recipes/vancouver_provice.recipe
+++ b/resources/recipes/vancouver_provice.recipe
@ -0,0 +1,136 @@
 #!/usr/bin/env  python
 __license__   = 'GPL v3'
 '''
 www.canada.com
 '''
 from calibre.web.feeds.recipes import BasicNewsRecipe
 class CanWestPaper(BasicNewsRecipe):
    # un-comment the following three lines for the Vancouver Province
    title = u'Vancouver Province'
    url_prefix = 'http://www.theprovince.com'
    description = u'News from Vancouver, BC'
    # un-comment the following three lines for the Vancouver Sun
    #title = u'Vancouver Sun'
    #url_prefix = 'http://www.vancouversun.com'
    #description = u'News from Vancouver, BC'
    # un-comment the following three lines for the Edmonton Journal
    #title = u'Edmonton Journal'
    #url_prefix = 'http://www.edmontonjournal.com'
    #description = u'News from Edmonton, AB'
    # un-comment the following three lines for the Calgary Herald
    #title = u'Calgary Herald'
    #url_prefix = 'http://www.calgaryherald.com'
    #description = u'News from Calgary, AB'
    # un-comment the following three lines for the Regina Leader-Post
    #title = u'Regina Leader-Post'
    #url_prefix = 'http://www.leaderpost.com'
    #description = u'News from Regina, SK'
    # un-comment the following three lines for the Saskatoon Star-Phoenix
    #title = u'Saskatoon Star-Phoenix'
    #url_prefix = 'http://www.thestarphoenix.com'
    #description = u'News from Saskatoon, SK'
    # un-comment the following three lines for the Windsor Star
    #title = u'Windsor Star'
    #url_prefix = 'http://www.windsorstar.com'
    #description = u'News from Windsor, ON'
    # un-comment the following three lines for the Ottawa Citizen
    #title = u'Ottawa Citizen'
    #url_prefix = 'http://www.ottawacitizen.com'
    #description = u'News from Ottawa, ON'
    # un-comment the following three lines for the Montreal Gazette
    #title = u'Montreal Gazette'
    #url_prefix = 'http://www.montrealgazette.com'
    #description = u'News from Montreal, QC'
    language = 'en_CA'
    __author__ = 'Nick Redding'
    no_stylesheets = True
    timefmt = ' [%b %d]'
    extra_css = '''
                .timestamp {  font-size:xx-small; display: block; }
                #storyheader { font-size: medium; }
                #storyheader h1 { font-size: x-large; }
                #storyheader h2 { font-size: large;  font-style: italic; }
                .byline { font-size:xx-small; }
                #photocaption { font-size: small; font-style: italic }
                #photocredit { font-size: xx-small; }'''
    keep_only_tags = [dict(name='div', attrs={'id':'storyheader'}),dict(name='div', attrs={'id':'storycontent'})]
    remove_tags = [{'class':'comments'},
                   dict(name='div', attrs={'class':'navbar'}),dict(name='div', attrs={'class':'morelinks'}),
                   dict(name='div', attrs={'class':'viewmore'}),dict(name='li', attrs={'class':'email'}),
                   dict(name='div', attrs={'class':'story_tool_hr'}),dict(name='div', attrs={'class':'clear'}),
                   dict(name='div', attrs={'class':'story_tool'}),dict(name='div', attrs={'class':'copyright'}),
                   dict(name='div', attrs={'class':'rule_grey_solid'}),
                   dict(name='li', attrs={'class':'print'}),dict(name='li', attrs={'class':'share'}),dict(name='ul', attrs={'class':'bullet'})]
    def preprocess_html(self,soup):
        #delete iempty id attributes--they screw up the TOC for unknow reasons
        divtags = soup.findAll('div',attrs={'id':''})
        if divtags:
            for div in divtags:
                del(div['id'])
        return soup
    def parse_index(self):
        soup = self.index_to_soup(self.url_prefix+'/news/todays-paper/index.html')
        articles = {}
        key = 'News'
        ans = ['News']
        # Find each instance of class="sectiontitle", class="featurecontent"
        for divtag in soup.findAll('div',attrs={'class' : ["section_title02","featurecontent"]}):
                #self.log(" div class = %s" % divtag['class'])
                if divtag['class'].startswith('section_title'):
                    # div contains section title
                    if not divtag.h3:
                        continue
                    key = self.tag_to_string(divtag.h3,False)
                    ans.append(key)
                    self.log("Section name %s" % key)
                    continue
                # div contains article data
                h1tag = divtag.find('h1')
                if not h1tag:
                    continue
                atag = h1tag.find('a',href=True)
                if not atag:
                    continue
                url = self.url_prefix+'/news/todays-paper/'+atag['href']
                #self.log("Section %s" % key)
                #self.log("url %s" % url)
                title = self.tag_to_string(atag,False)
                #self.log("title %s" % title)
                pubdate = ''
                description = ''
                ptag = divtag.find('p');
                if ptag:
                    description = self.tag_to_string(ptag,False)
                    #self.log("description %s" % description)
                author = ''
                autag = divtag.find('h4')
                if autag:
                    author = self.tag_to_string(autag,False)
                    #self.log("author %s" % author)
                if not articles.has_key(key):
                    articles[key] = []
                articles[key].append(dict(title=title,url=url,date=pubdate,description=description,author=author,content=''))
        ans = [(key, articles[key]) for key in ans if articles.has_key(key)]
        return ans
--- a/resources/recipes/vancouver_sun.recipe
+++ b/resources/recipes/vancouver_sun.recipe
@ -0,0 +1,131 @@
 #!/usr/bin/env  python
 __license__   = 'GPL v3'
 '''
 www.canada.com
 '''
 from calibre.web.feeds.recipes import BasicNewsRecipe
 class CanWestPaper(BasicNewsRecipe):
    # un-comment the following three lines for the Vancouver Sun
    title = u'Vancouver Sun'
    url_prefix = 'http://www.vancouversun.com'
    description = u'News from Vancouver, BC'
    # un-comment the following three lines for the Edmonton Journal
    #title = u'Edmonton Journal'
    #url_prefix = 'http://www.edmontonjournal.com'
    #description = u'News from Edmonton, AB'
    # un-comment the following three lines for the Calgary Herald
    #title = u'Calgary Herald'
    #url_prefix = 'http://www.calgaryherald.com'
    #description = u'News from Calgary, AB'
    # un-comment the following three lines for the Regina Leader-Post
    #title = u'Regina Leader-Post'
    #url_prefix = 'http://www.leaderpost.com'
    #description = u'News from Regina, SK'
    # un-comment the following three lines for the Saskatoon Star-Phoenix
    #title = u'Saskatoon Star-Phoenix'
    #url_prefix = 'http://www.thestarphoenix.com'
    #description = u'News from Saskatoon, SK'
    # un-comment the following three lines for the Windsor Star
    #title = u'Windsor Star'
    #url_prefix = 'http://www.windsorstar.com'
    #description = u'News from Windsor, ON'
    # un-comment the following three lines for the Ottawa Citizen
    #title = u'Ottawa Citizen'
    #url_prefix = 'http://www.ottawacitizen.com'
    #description = u'News from Ottawa, ON'
    # un-comment the following three lines for the Montreal Gazette
    #title = u'Montreal Gazette'
    #url_prefix = 'http://www.montrealgazette.com'
    #description = u'News from Montreal, QC'
    language = 'en_CA'
    __author__ = 'Nick Redding'
    no_stylesheets = True
    timefmt = ' [%b %d]'
    extra_css = '''
                .timestamp {  font-size:xx-small; display: block; }
                #storyheader { font-size: medium; }
                #storyheader h1 { font-size: x-large; }
                #storyheader h2 { font-size: large;  font-style: italic; }
                .byline { font-size:xx-small; }
                #photocaption { font-size: small; font-style: italic }
                #photocredit { font-size: xx-small; }'''
    keep_only_tags = [dict(name='div', attrs={'id':'storyheader'}),dict(name='div', attrs={'id':'storycontent'})]
    remove_tags = [{'class':'comments'},
                   dict(name='div', attrs={'class':'navbar'}),dict(name='div', attrs={'class':'morelinks'}),
                   dict(name='div', attrs={'class':'viewmore'}),dict(name='li', attrs={'class':'email'}),
                   dict(name='div', attrs={'class':'story_tool_hr'}),dict(name='div', attrs={'class':'clear'}),
                   dict(name='div', attrs={'class':'story_tool'}),dict(name='div', attrs={'class':'copyright'}),
                   dict(name='div', attrs={'class':'rule_grey_solid'}),
                   dict(name='li', attrs={'class':'print'}),dict(name='li', attrs={'class':'share'}),dict(name='ul', attrs={'class':'bullet'})]
    def preprocess_html(self,soup):
        #delete iempty id attributes--they screw up the TOC for unknow reasons
        divtags = soup.findAll('div',attrs={'id':''})
        if divtags:
            for div in divtags:
                del(div['id'])
        return soup
    def parse_index(self):
        soup = self.index_to_soup(self.url_prefix+'/news/todays-paper/index.html')
        articles = {}
        key = 'News'
        ans = ['News']
        # Find each instance of class="sectiontitle", class="featurecontent"
        for divtag in soup.findAll('div',attrs={'class' : ["section_title02","featurecontent"]}):
                #self.log(" div class = %s" % divtag['class'])
                if divtag['class'].startswith('section_title'):
                    # div contains section title
                    if not divtag.h3:
                        continue
                    key = self.tag_to_string(divtag.h3,False)
                    ans.append(key)
                    self.log("Section name %s" % key)
                    continue
                # div contains article data
                h1tag = divtag.find('h1')
                if not h1tag:
                    continue
                atag = h1tag.find('a',href=True)
                if not atag:
                    continue
                url = self.url_prefix+'/news/todays-paper/'+atag['href']
                #self.log("Section %s" % key)
                #self.log("url %s" % url)
                title = self.tag_to_string(atag,False)
                #self.log("title %s" % title)
                pubdate = ''
                description = ''
                ptag = divtag.find('p');
                if ptag:
                    description = self.tag_to_string(ptag,False)
                    #self.log("description %s" % description)
                author = ''
                autag = divtag.find('h4')
                if autag:
                    author = self.tag_to_string(autag,False)
                    #self.log("author %s" % author)
                if not articles.has_key(key):
                    articles[key] = []
                articles[key].append(dict(title=title,url=url,date=pubdate,description=description,author=author,content=''))
        ans = [(key, articles[key]) for key in ans if articles.has_key(key)]
        return ans
--- a/resources/recipes/vic_times.recipe
+++ b/resources/recipes/vic_times.recipe
@ -0,0 +1,141 @@
 #!/usr/bin/env  python
 __license__   = 'GPL v3'
 '''
 www.canada.com
 '''
 from calibre.web.feeds.recipes import BasicNewsRecipe
 class CanWestPaper(BasicNewsRecipe):
    # un-comment the following three lines for the Victoria Times Colonist
    title = u'Victoria Times Colonist'
    url_prefix = 'http://www.timescolonist.com'
    description = u'News from Victoria, BC'
    # un-comment the following three lines for the Vancouver Province
    #title = u'Vancouver Province'
    #url_prefix = 'http://www.theprovince.com'
    #description = u'News from Vancouver, BC'
    # un-comment the following three lines for the Vancouver Sun
    #title = u'Vancouver Sun'
    #url_prefix = 'http://www.vancouversun.com'
    #description = u'News from Vancouver, BC'
    # un-comment the following three lines for the Edmonton Journal
    #title = u'Edmonton Journal'
    #url_prefix = 'http://www.edmontonjournal.com'
    #description = u'News from Edmonton, AB'
    # un-comment the following three lines for the Calgary Herald
    #title = u'Calgary Herald'
    #url_prefix = 'http://www.calgaryherald.com'
    #description = u'News from Calgary, AB'
    # un-comment the following three lines for the Regina Leader-Post
    #title = u'Regina Leader-Post'
    #url_prefix = 'http://www.leaderpost.com'
    #description = u'News from Regina, SK'
    # un-comment the following three lines for the Saskatoon Star-Phoenix
    #title = u'Saskatoon Star-Phoenix'
    #url_prefix = 'http://www.thestarphoenix.com'
    #description = u'News from Saskatoon, SK'
    # un-comment the following three lines for the Windsor Star
    #title = u'Windsor Star'
    #url_prefix = 'http://www.windsorstar.com'
    #description = u'News from Windsor, ON'
    # un-comment the following three lines for the Ottawa Citizen
    #title = u'Ottawa Citizen'
    #url_prefix = 'http://www.ottawacitizen.com'
    #description = u'News from Ottawa, ON'
    # un-comment the following three lines for the Montreal Gazette
    #title = u'Montreal Gazette'
    #url_prefix = 'http://www.montrealgazette.com'
    #description = u'News from Montreal, QC'
    language = 'en_CA'
    __author__ = 'Nick Redding'
    no_stylesheets = True
    timefmt = ' [%b %d]'
    extra_css = '''
                .timestamp {  font-size:xx-small; display: block; }
                #storyheader { font-size: medium; }
                #storyheader h1 { font-size: x-large; }
                #storyheader h2 { font-size: large;  font-style: italic; }
                .byline { font-size:xx-small; }
                #photocaption { font-size: small; font-style: italic }
                #photocredit { font-size: xx-small; }'''
    keep_only_tags = [dict(name='div', attrs={'id':'storyheader'}),dict(name='div', attrs={'id':'storycontent'})]
    remove_tags = [{'class':'comments'},
                   dict(name='div', attrs={'class':'navbar'}),dict(name='div', attrs={'class':'morelinks'}),
                   dict(name='div', attrs={'class':'viewmore'}),dict(name='li', attrs={'class':'email'}),
                   dict(name='div', attrs={'class':'story_tool_hr'}),dict(name='div', attrs={'class':'clear'}),
                   dict(name='div', attrs={'class':'story_tool'}),dict(name='div', attrs={'class':'copyright'}),
                   dict(name='div', attrs={'class':'rule_grey_solid'}),
                   dict(name='li', attrs={'class':'print'}),dict(name='li', attrs={'class':'share'}),dict(name='ul', attrs={'class':'bullet'})]
    def preprocess_html(self,soup):
        #delete iempty id attributes--they screw up the TOC for unknow reasons
        divtags = soup.findAll('div',attrs={'id':''})
        if divtags:
            for div in divtags:
                del(div['id'])
        return soup
    def parse_index(self):
        soup = self.index_to_soup(self.url_prefix+'/news/todays-paper/index.html')
        articles = {}
        key = 'News'
        ans = ['News']
        # Find each instance of class="sectiontitle", class="featurecontent"
        for divtag in soup.findAll('div',attrs={'class' : ["section_title02","featurecontent"]}):
                #self.log(" div class = %s" % divtag['class'])
                if divtag['class'].startswith('section_title'):
                    # div contains section title
                    if not divtag.h3:
                        continue
                    key = self.tag_to_string(divtag.h3,False)
                    ans.append(key)
                    self.log("Section name %s" % key)
                    continue
                # div contains article data
                h1tag = divtag.find('h1')
                if not h1tag:
                    continue
                atag = h1tag.find('a',href=True)
                if not atag:
                    continue
                url = self.url_prefix+'/news/todays-paper/'+atag['href']
                #self.log("Section %s" % key)
                #self.log("url %s" % url)
                title = self.tag_to_string(atag,False)
                #self.log("title %s" % title)
                pubdate = ''
                description = ''
                ptag = divtag.find('p');
                if ptag:
                    description = self.tag_to_string(ptag,False)
                    #self.log("description %s" % description)
                author = ''
                autag = divtag.find('h4')
                if autag:
                    author = self.tag_to_string(autag,False)
                    #self.log("author %s" % author)
                if not articles.has_key(key):
                    articles[key] = []
                articles[key].append(dict(title=title,url=url,date=pubdate,description=description,author=author,content=''))
        ans = [(key, articles[key]) for key in ans if articles.has_key(key)]
        return ans
--- a/resources/recipes/windows_star.recipe
+++ b/resources/recipes/windows_star.recipe
@ -0,0 +1,106 @@
 #!/usr/bin/env  python
 __license__   = 'GPL v3'
 '''
 www.canada.com
 '''
 from calibre.web.feeds.recipes import BasicNewsRecipe
 class CanWestPaper(BasicNewsRecipe):
    # un-comment the following three lines for the Windsor Star
    title = u'Windsor Star'
    url_prefix = 'http://www.windsorstar.com'
    description = u'News from Windsor, ON'
    # un-comment the following three lines for the Ottawa Citizen
    #title = u'Ottawa Citizen'
    #url_prefix = 'http://www.ottawacitizen.com'
    #description = u'News from Ottawa, ON'
    # un-comment the following three lines for the Montreal Gazette
    #title = u'Montreal Gazette'
    #url_prefix = 'http://www.montrealgazette.com'
    #description = u'News from Montreal, QC'
    language = 'en_CA'
    __author__ = 'Nick Redding'
    no_stylesheets = True
    timefmt = ' [%b %d]'
    extra_css = '''
                .timestamp {  font-size:xx-small; display: block; }
                #storyheader { font-size: medium; }
                #storyheader h1 { font-size: x-large; }
                #storyheader h2 { font-size: large;  font-style: italic; }
                .byline { font-size:xx-small; }
                #photocaption { font-size: small; font-style: italic }
                #photocredit { font-size: xx-small; }'''
    keep_only_tags = [dict(name='div', attrs={'id':'storyheader'}),dict(name='div', attrs={'id':'storycontent'})]
    remove_tags = [{'class':'comments'},
                   dict(name='div', attrs={'class':'navbar'}),dict(name='div', attrs={'class':'morelinks'}),
                   dict(name='div', attrs={'class':'viewmore'}),dict(name='li', attrs={'class':'email'}),
                   dict(name='div', attrs={'class':'story_tool_hr'}),dict(name='div', attrs={'class':'clear'}),
                   dict(name='div', attrs={'class':'story_tool'}),dict(name='div', attrs={'class':'copyright'}),
                   dict(name='div', attrs={'class':'rule_grey_solid'}),
                   dict(name='li', attrs={'class':'print'}),dict(name='li', attrs={'class':'share'}),dict(name='ul', attrs={'class':'bullet'})]
    def preprocess_html(self,soup):
        #delete iempty id attributes--they screw up the TOC for unknow reasons
        divtags = soup.findAll('div',attrs={'id':''})
        if divtags:
            for div in divtags:
                del(div['id'])
        return soup
    def parse_index(self):
        soup = self.index_to_soup(self.url_prefix+'/news/todays-paper/index.html')
        articles = {}
        key = 'News'
        ans = ['News']
        # Find each instance of class="sectiontitle", class="featurecontent"
        for divtag in soup.findAll('div',attrs={'class' : ["section_title02","featurecontent"]}):
                #self.log(" div class = %s" % divtag['class'])
                if divtag['class'].startswith('section_title'):
                    # div contains section title
                    if not divtag.h3:
                        continue
                    key = self.tag_to_string(divtag.h3,False)
                    ans.append(key)
                    self.log("Section name %s" % key)
                    continue
                # div contains article data
                h1tag = divtag.find('h1')
                if not h1tag:
                    continue
                atag = h1tag.find('a',href=True)
                if not atag:
                    continue
                url = self.url_prefix+'/news/todays-paper/'+atag['href']
                #self.log("Section %s" % key)
                #self.log("url %s" % url)
                title = self.tag_to_string(atag,False)
                #self.log("title %s" % title)
                pubdate = ''
                description = ''
                ptag = divtag.find('p');
                if ptag:
                    description = self.tag_to_string(ptag,False)
                    #self.log("description %s" % description)
                author = ''
                autag = divtag.find('h4')
                if autag:
                    author = self.tag_to_string(autag,False)
                    #self.log("author %s" % author)
                if not articles.has_key(key):
                    articles[key] = []
                articles[key].append(dict(title=title,url=url,date=pubdate,description=description,author=author,content=''))
        ans = [(key, articles[key]) for key in ans if articles.has_key(key)]
        return ans
--- a/resources/recipes/wsj.recipe
+++ b/resources/recipes/wsj.recipe
@ -5,6 +5,7 @@ __docformat__ = 'restructuredtext en'
 from calibre.web.feeds.news import BasicNewsRecipe
 from calibre import strftime
 # http://online.wsj.com/page/us_in_todays_paper.html
@ -67,6 +68,13 @@ class WallStreetJournal(BasicNewsRecipe):
        def parse_index(self):
            soup = self.wsj_get_index()
            year = strftime('%Y')
            for x in soup.findAll('td', attrs={'class':'b14'}):
                txt = self.tag_to_string(x).strip()
                if year in txt:
                    self.timefmt = ' [%s]'%txt
                    break
            left_column = soup.find(
                    text=lambda t: 'begin ITP Left Column' in str(t))
@ -91,7 +99,7 @@ class WallStreetJournal(BasicNewsRecipe):
                    url = url.partition('#')[0]
                    desc = ''
                    d = x.findNextSibling(True)
-                    if d.get('class', None) == 'arialResize':
+                    if d is not None and d.get('class', None) == 'arialResize':
                        desc = self.tag_to_string(d)
                        desc = desc.partition(u'\u2022')[0]
                    self.log('\t\tFound article:', title)
--- a/resources/recipes/wsj_free.recipe
+++ b/resources/recipes/wsj_free.recipe
@ -3,47 +3,139 @@
 __license__   = 'GPL v3'
 '''
-online.wsj.com.com
+online.wsj.com
 '''
 import re
 from calibre.web.feeds.recipes import BasicNewsRecipe
 from calibre.ebooks.BeautifulSoup import Tag, NavigableString
 from datetime import timedelta, date
 class WSJ(BasicNewsRecipe):
    # formatting adapted from original recipe by Kovid Goyal and Sujata Raman
    title          = u'Wall Street Journal (free)'
    __author__     = 'Nick Redding'
    language = 'en'
-    description = ('All the free content from the Wall Street Journal (business'
+    description = ('All the free content from the Wall Street Journal (business, financial and political news)')
-            ', financial and political news)')
+
    no_stylesheets = True
    timefmt = ' [%b %d]'
-    extra_css   = '''h1{font-size:large; font-family:Georgia,"Century Schoolbook","Times New Roman",Times,serif;}
+
-                    h2{font-family:Georgia,"Century Schoolbook","Times New Roman",Times,serif; font-size:small; font-style:italic;}
+    # customization notes: delete sections you are not interested in
-                    .subhead{font-family:Georgia,"Century Schoolbook","Times New Roman",Times,serif; font-size:small; font-style:italic;}
+    # set omit_paid_content to False if you want the paid content article snippets
-                    .insettipUnit {font-family:Arial,Sans-serif;font-size:xx-small;}
+    # set oldest_article to the maximum number of days back from today to include articles
-                    .targetCaption{font-size:x-small; font-family:Arial,Helvetica,sans-serif;}
+    sectionlist = [
-                    .article{font-family :Arial,Helvetica,sans-serif; font-size:x-small;}
+                        ['/home-page','Front Page'],
-                    .tagline { ont-size:xx-small;}
+                        ['/public/page/news-opinion-commentary.html','Commentary'],
-                    .dateStamp {font-family:Arial,Helvetica,sans-serif;}
+                        ['/public/page/news-global-world.html','World News'],
-                    h3{font-family:Arial,Helvetica,sans-serif; font-size:xx-small;}
+                        ['/public/page/news-world-business.html','US News'],
-                    .byline {font-family:Arial,Helvetica,sans-serif; font-size:xx-small; list-style-type: none;}
+                        ['/public/page/news-business-us.html','Business'],
                        ['/public/page/news-financial-markets-stock.html','Markets'],
                        ['/public/page/news-tech-technology.html','Technology'],
                        ['/public/page/news-personal-finance.html','Personal Finnce'],
                        ['/public/page/news-lifestyle-arts-entertainment.html','Life & Style'],
                        ['/public/page/news-real-estate-homes.html','Real Estate'],
                        ['/public/page/news-career-jobs.html','Careers'],
                        ['/public/page/news-small-business-marketing.html','Small Business']
                    ]
    oldest_article = 2
    omit_paid_content = True
    extra_css   = '''h1{font-size:large; font-family:Times,serif;}
                    h2{font-family:Times,serif; font-size:small; font-style:italic;}
                    .subhead{font-family:Times,serif; font-size:small; font-style:italic;}
                    .insettipUnit {font-family:Times,serif;font-size:xx-small;}
                    .targetCaption{font-size:x-small; font-family:Times,serif; font-style:italic; margin-top: 0.25em;}
                    .article{font-family:Times,serif; font-size:x-small;}
                    .tagline { font-size:xx-small;}
                    .dateStamp {font-family:Times,serif;}
                    h3{font-family:Times,serif; font-size:xx-small;}
                    .byline {font-family:Times,serif; font-size:xx-small; list-style-type: none;}
                    .metadataType-articleCredits {list-style-type: none;}
-                    h6{ font-family:Georgia,"Century Schoolbook","Times New Roman",Times,serif; font-size:small;font-style:italic;}
+                    h6{font-family:Times,serif; font-size:small; font-style:italic;}
                    .paperLocation{font-size:xx-small;}'''
-    remove_tags_before = dict(name='h1')
+
-    remove_tags =   [   dict(id=["articleTabs_tab_article", "articleTabs_tab_comments",
+    remove_tags_before = dict({'class':re.compile('^articleHeadlineBox')})
-                                 "articleTabs_tab_interactive","articleTabs_tab_video",
+    remove_tags =   [   dict({'id':re.compile('^articleTabs_tab_')}),
-                                 "articleTabs_tab_map","articleTabs_tab_slideshow"]),
+                        #dict(id=["articleTabs_tab_article", "articleTabs_tab_comments",
-			{'class':['footer_columns','network','insetCol3wide','interactive','video','slideshow','map',
+                        #         "articleTabs_tab_interactive","articleTabs_tab_video",
-			'insettip','insetClose','more_in', "insetContent", 'articleTools_bottom', 'aTools', 'tooltip',
+                        #         "articleTabs_tab_map","articleTabs_tab_slideshow"]),
-			'adSummary', 'nav-inline','insetFullBracket']},
+			{'class':  ['footer_columns','network','insetCol3wide','interactive','video','slideshow','map',
-                        dict(rel='shortcut icon'),
+                                    'insettip','insetClose','more_in', "insetContent",
                        #            'articleTools_bottom','articleTools_bottom mjArticleTools',
                                    'aTools', 'tooltip',
                                    'adSummary', 'nav-inline','insetFullBracket']},
                        dict({'class':re.compile('^articleTools_bottom')}),
                        dict(rel='shortcut icon')
                    ]
    remove_tags_after = [dict(id="article_story_body"), {'class':"article story"}]
    def get_browser(self):
        br = BasicNewsRecipe.get_browser()
        return br
    def preprocess_html(self,soup):
        def decode_us_date(datestr):
            udate = datestr.strip().lower().split()
            m = ['january','february','march','april','may','june','july','august','september','october','november','december'].index(udate[0])+1
            d = int(udate[1])
            y = int(udate[2])
            return date(y,m,d)
        # check if article is paid content
        if self.omit_paid_content:
            divtags = soup.findAll('div','tooltip')
            if divtags:
                for divtag in divtags:
                    if divtag.find(text="Subscriber Content"):
                        return None
        # check if article is too old
        datetag = soup.find('li',attrs={'class' : re.compile("^dateStamp")})
        if datetag:
            dateline_string = self.tag_to_string(datetag,False)
            date_items = dateline_string.split(',')
            datestring = date_items[0]+date_items[1]
            article_date = decode_us_date(datestring)
            earliest_date = date.today() - timedelta(days=self.oldest_article)
            if article_date < earliest_date:
                self.log("Skipping article dated %s" % datestring)
                return None
            datetag.parent.extract()
            # place dateline in article heading
            bylinetag = soup.find('h3','byline')
            if bylinetag:
                h3bylinetag = bylinetag
            else:
                bylinetag = soup.find('li','byline')
                if bylinetag:
                    h3bylinetag = bylinetag.h3
                    if not h3bylinetag:
                        h3bylinetag = bylinetag
                    bylinetag = bylinetag.parent
            if bylinetag:
                if h3bylinetag.a:
                    bylinetext = 'By '+self.tag_to_string(h3bylinetag.a,False)
                else:
                    bylinetext = self.tag_to_string(h3bylinetag,False)
                h3byline = Tag(soup,'h3',[('class','byline')])
                if bylinetext.isspace() or (bylinetext == ''):
                    h3byline.insert(0,NavigableString(date_items[0]+','+date_items[1]))
                else:
                    h3byline.insert(0,NavigableString(bylinetext+u'\u2014'+date_items[0]+','+date_items[1]))
                bylinetag.replaceWith(h3byline)
            else:
                headlinetag = soup.find('div',attrs={'class' : re.compile("^articleHeadlineBox")})
                if headlinetag:
                    dateline = Tag(soup,'h3', [('class','byline')])
                    dateline.insert(0,NavigableString(date_items[0]+','+date_items[1]))
                    headlinetag.insert(len(headlinetag),dateline)
        else: # if no date tag, don't process this page--it's not a news item
            return None
        # This gets rid of the annoying superfluous bullet symbol preceding columnist bylines
        ultag = soup.find('ul',attrs={'class' : 'cMetadata metadataType-articleCredits'})
        if ultag:
@ -58,7 +150,7 @@ class WSJ(BasicNewsRecipe):
        key = None
        ans = []
-        def parse_index_page(page_name,page_title,omit_paid_content):
+        def parse_index_page(page_name,page_title):
            def article_title(tag):
                atag = tag.find('h2') # title is usually in an h2 tag
@ -119,7 +211,6 @@ class WSJ(BasicNewsRecipe):
            soup = self.index_to_soup(pageurl)
            # Find each instance of div with class including "headlineSummary"
            for divtag in soup.findAll('div',attrs={'class' : re.compile("^headlineSummary")}):
                # divtag contains all article data as ul's and li's
                # first, check if there is an h3 tag which provides a section name
                stag = divtag.find('h3')
@ -162,7 +253,7 @@ class WSJ(BasicNewsRecipe):
                        # now skip paid subscriber articles if desired
                        subscriber_tag = litag.find(text="Subscriber Content")
                        if subscriber_tag:
-                                if omit_paid_content:
+                                if self.omit_paid_content:
                                    continue
                                # delete the tip div so it doesn't get in the way
                                tiptag = litag.find("div", { "class" : "tipTargetBox" })
@ -185,7 +276,7 @@ class WSJ(BasicNewsRecipe):
                            continue
                        if url.startswith("/article"):
                            url = mainurl+url
-                        if not url.startswith("http"):
+                        if not url.startswith("http://online.wsj.com"):
                            continue
                        if not url.endswith(".html"):
                            continue
@ -214,48 +305,10 @@ class WSJ(BasicNewsRecipe):
                            articles[page_title] = []
                        articles[page_title].append(dict(title=title,url=url,date=pubdate,description=description,author=author,content=''))
        # customization notes: delete sections you are not interested in
        # set omit_paid_content to False if you want the paid content article previews
        sectionlist = ['Front Page','Commentary','World News','US News','Business','Markets',
                       'Technology','Personal Finance','Life & Style','Real Estate','Careers','Small Business']
        omit_paid_content = True
-        if 'Front Page' in sectionlist:
+        for page_name,page_title in self.sectionlist:
-            parse_index_page('/home-page','Front Page',omit_paid_content)
+            parse_index_page(page_name,page_title)
-            ans.append('Front Page')
+            ans.append(page_title)
        if 'Commentary' in sectionlist:
            parse_index_page('/public/page/news-opinion-commentary.html','Commentary',omit_paid_content)
            ans.append('Commentary')
        if 'World News' in sectionlist:
            parse_index_page('/public/page/news-global-world.html','World News',omit_paid_content)
            ans.append('World News')
        if 'US News' in sectionlist:
            parse_index_page('/public/page/news-world-business.html','US News',omit_paid_content)
            ans.append('US News')
        if 'Business' in sectionlist:
            parse_index_page('/public/page/news-business-us.html','Business',omit_paid_content)
            ans.append('Business')
        if 'Markets' in sectionlist:
            parse_index_page('/public/page/news-financial-markets-stock.html','Markets',omit_paid_content)
            ans.append('Markets')
        if 'Technology' in sectionlist:
            parse_index_page('/public/page/news-tech-technology.html','Technology',omit_paid_content)
            ans.append('Technology')
        if 'Personal Finance' in sectionlist:
            parse_index_page('/public/page/news-personal-finance.html','Personal Finance',omit_paid_content)
            ans.append('Personal Finance')
        if 'Life & Style' in sectionlist:
            parse_index_page('/public/page/news-lifestyle-arts-entertainment.html','Life & Style',omit_paid_content)
            ans.append('Life & Style')
        if 'Real Estate' in sectionlist:
            parse_index_page('/public/page/news-real-estate-homes.html','Real Estate',omit_paid_content)
            ans.append('Real Estate')
        if 'Careers' in sectionlist:
            parse_index_page('/public/page/news-career-jobs.html','Careers',omit_paid_content)
            ans.append('Careers')
        if 'Small Business' in sectionlist:
            parse_index_page('/public/page/news-small-business-marketing.html','Small Business',omit_paid_content)
            ans.append('Small Business')
        ans = [(key, articles[key]) for key in ans if articles.has_key(key)]
        return ans
--- a/resources/recipes/yementimes.recipe
+++ b/resources/recipes/yementimes.recipe
@ -0,0 +1,125 @@
 from calibre.web.feeds.news import BasicNewsRecipe
 from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag
 class YemenTimesRecipe(BasicNewsRecipe):
    __license__  = 'GPL v3'
    __author__ = 'kwetal'
    language = 'en_YE'
    country = 'YE'
    version = 1
    title = u'Yemen Times'
    publisher = u'yementimes.com'
    category = u'News, Opinion, Yemen'
    description = u'Award winning weekly from Yemen, promoting press freedom, professional journalism and the defense of human rights.'
    oldest_article = 7
    max_articles_per_feed = 100
    use_embedded_content = False
    encoding = 'utf-8'
    remove_empty_feeds = True
    no_stylesheets = True
    remove_javascript = True
    keep_only_tags = []
    keep_only_tags.append(dict(name = 'div', attrs = {'id': 'ctl00_ContentPlaceHolder1_MAINNEWS0_Panel1',
                                                      'class': 'DMAIN2'}))
    remove_attributes = ['style']
    INDEX = 'http://www.yementimes.com/'
    feeds = []
    feeds.append((u'Our Viewpoint', u'http://www.yementimes.com/DEFAULTSUB.ASPX?pnc=6&pnm=OUR%20VIEWPOINT'))
    feeds.append((u'Local News', u'http://www.yementimes.com/DEFAULTSUB.ASPX?pnc=3&pnm=Local%20news'))
    feeds.append((u'Their News', u'http://www.yementimes.com/DEFAULTSUB.ASPX?pnc=80&pnm=Their%20News'))
    feeds.append((u'Report', u'http://www.yementimes.com/DEFAULTSUB.ASPX?pnc=8&pnm=report'))
    feeds.append((u'Health', u'http://www.yementimes.com/DEFAULTSUB.ASPX?pnc=51&pnm=health'))
    feeds.append((u'Interview', u'http://www.yementimes.com/DEFAULTSUB.ASPX?pnc=77&pnm=interview'))
    feeds.append((u'Opinion', u'http://www.yementimes.com/DEFAULTSUB.ASPX?pnc=7&pnm=opinion'))
    feeds.append((u'Business', u'http://www.yementimes.com/DEFAULTSUB.ASPX?pnc=5&pnm=business'))
    feeds.append((u'Op-Ed', u'http://www.yementimes.com/DEFAULTSUB.ASPX?pnc=81&pnm=Op-Ed'))
    feeds.append((u'Culture', u'http://www.yementimes.com/DEFAULTSUB.ASPX?pnc=75&pnm=Culture'))
    feeds.append((u'Readers View', u'http://www.yementimes.com/DEFAULTSUB.ASPX?pnc=4&pnm=Readers%20View'))
    feeds.append((u'Variety', u'http://www.yementimes.com/DEFAULTSUB.ASPX?pnc=9&pnm=Variety'))
    feeds.append((u'Education', u'http://www.yementimes.com/DEFAULTSUB.ASPX?pnc=57&pnm=Education'))
    extra_css = '''
                body {font-family:verdana, arial, helvetica, geneva, sans-serif;}
                div.yemen_byline {font-size: medium; font-weight: bold;}
                div.yemen_date {font-size: small; color: #666666; margin-bottom: 0.6em;}
                .yemen_caption {font-size: x-small; font-style: italic; color: #696969;}
                '''
    conversion_options = {'comments': description, 'tags': category, 'language': 'en',
                          'publisher': publisher, 'linearize_tables': True}
    def get_browser(self):
        br = BasicNewsRecipe.get_browser()
        br.set_handle_gzip(True)
        return br
    def parse_index(self):
        answer = []
        for feed_title, feed in self.feeds:
            soup = self.index_to_soup(feed)
            newsbox = soup.find('div', 'newsbox')
            main = newsbox.findNextSibling('table')
            articles = []
            for li in main.findAll('li'):
                title = self.tag_to_string(li.a)
                url = self.INDEX + li.a['href']
                articles.append({'title': title, 'date': None, 'url': url, 'description': '<br/>&nbsp;'})
            answer.append((feed_title, articles))
        return answer
    def preprocess_html(self, soup):
        freshSoup = self.getFreshSoup(soup)
        headline = soup.find('div', attrs = {'id': 'DVMTIT'})
        if headline:
            div = headline.findNext('div', attrs = {'id': 'DVTOP'})
            img = None
            if div:
                img = div.find('img')
            headline.name = 'h1'
            freshSoup.body.append(headline)
            if img is not None:
                freshSoup.body.append(img)
        byline = soup.find('div', attrs = {'id': 'DVTIT'})
        if byline:
            date_el = byline.find('span')
            if date_el:
                pub_date = self.tag_to_string(date_el)
                date = Tag(soup, 'div', attrs = [('class', 'yemen_date')])
                date.append(pub_date)
                date_el.extract()
            raw = '<br/>'.join(['%s' % (part) for part in byline.findAll(text = True)])
            author = BeautifulSoup('<div class="yemen_byline">' + raw + '</div>')
            if date is not None:
                freshSoup.body.append(date)
            freshSoup.body.append(author)
        story = soup.find('div', attrs = {'id': 'DVDET'})
        if story:
            for table in story.findAll('table'):
                if table.find('img'):
                    table['class'] = 'yemen_caption'
            freshSoup.body.append(story)
        return freshSoup
    def getFreshSoup(self, oldSoup):
        freshSoup = BeautifulSoup('<html><head><title></title></head><body></body></html>')
        if oldSoup.head.title:
            freshSoup.head.title.append(self.tag_to_string(oldSoup.head.title))
        return freshSoup
--- a/resources/viewer/images.js
+++ b/resources/viewer/images.js
@ -0,0 +1,23 @@
 /*
 * images management
 * Copyright 2008 Kovid Goyal
 * License: GNU GPL v3
 */
 function scale_images() {
    $("img:visible").each(function() {
        var offset = $(this).offset();
        //window.py_bridge.debug(window.getComputedStyle(this, '').getPropertyValue('max-width'));
        $(this).css("max-width", (window.innerWidth-offset.left-5)+"px");
        $(this).css("max-height", (window.innerHeight-5)+"px");
    });
 }
 function setup_image_scaling_handlers() {
   scale_images();
   $(window).resize(function(){
        scale_images();
   });
 }
--- a/src/calibre/customize/init.py
+++ b/src/calibre/customize/init.py
@ -2,10 +2,11 @@ from __future__ import with_statement
 __license__   = 'GPL v3'
 __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
-import sys
+import atexit, os, shutil, sys, tempfile, zipfile
 from calibre.ptempfile import PersistentTemporaryFile
 from calibre.constants import numeric_version
 from calibre.ptempfile import PersistentTemporaryFile
 class Plugin(object):
    '''
@ -225,12 +226,14 @@ class MetadataWriterPlugin(Plugin):
        '''
        pass
-
+    
 class CatalogPlugin(Plugin):
    '''
    A plugin that implements a catalog generator.
    '''
    resources_path = None
    #: Output file type for which this plugin should be run
    #: For example: 'epub' or 'xml'
    file_types = set([])
@ -248,15 +251,19 @@ class CatalogPlugin(Plugin):
    #:                       '%default' + "'"))]
    cli_options = []
    def search_sort_db(self, db, opts):
-        if opts.search_text:
+
        # If declared, --ids overrides any declared search criteria
        if not opts.ids and opts.search_text:
            db.search(opts.search_text)
        if opts.sort_by:
            # 2nd arg = ascending
            db.sort(opts.sort_by, True)
-
+        
-        return db.get_data_as_dict()
+        return db.get_data_as_dict(ids=opts.ids)
    def get_output_fields(self, opts):
        # Return a list of requested fields, with opts.sort_by first
@ -272,11 +279,40 @@ class CatalogPlugin(Plugin):
            fields = list(all_fields & requested_fields)
        else:
            fields = list(all_fields)
        fields.sort()
-        fields.insert(0,fields.pop(int(fields.index(opts.sort_by))))
+        if opts.sort_by:
            fields.insert(0,fields.pop(int(fields.index(opts.sort_by))))
        return fields
-    def run(self, path_to_output, opts, db):
+    def initialize(self):
        '''
        If plugin is not a built-in, copy the plugin's .ui and .py files from
        the zip file to $TMPDIR.
        Tab will be dynamically generated and added to the Catalog Options dialog in 
        calibre.gui2.dialogs.catalog.py:Catalog
        '''
        from calibre.customize.builtins import plugins as builtin_plugins
        from calibre.customize.ui import config
        from calibre.ptempfile import PersistentTemporaryDirectory
        if not type(self) in builtin_plugins and \
           not self.name in config['disabled_plugins']:
            files_to_copy = ["%s.%s" % (self.name.lower(),ext) for ext in ["ui","py"]]
            resources = zipfile.ZipFile(self.plugin_path,'r')
            if self.resources_path is None:
                self.resources_path = PersistentTemporaryDirectory('_plugin_resources', prefix='')
            for file in files_to_copy:
                try:
                    resources.extract(file, self.resources_path)
                except:
                    print " customize:__init__.initialize(): %s not found in %s" % (file, os.path.basename(self.plugin_path))
                    continue
            resources.close()                
    def run(self, path_to_output, opts, db, ids):
        '''
        Run the plugin. Must be implemented in subclasses.
        It should generate the catalog in the format specified
--- a/src/calibre/devices/blackberry/driver.py
+++ b/src/calibre/devices/blackberry/driver.py
@ -18,7 +18,7 @@ class BLACKBERRY(USBMS):
    VENDOR_ID   = [0x0fca]
    PRODUCT_ID  = [0x8004, 0x0004]
-    BCD         = [0x0200, 0x0107]
+    BCD         = [0x0200, 0x0107, 0x0201]
    VENDOR_NAME = 'RIM'
    WINDOWS_MAIN_MEM = 'BLACKBERRY_SD'
--- a/src/calibre/devices/nook/driver.py
+++ b/src/calibre/devices/nook/driver.py
@ -86,4 +86,5 @@ class NOOK(USBMS):
        return drives
-
+    def sanitize_path_components(self, components):
        return [x.replace('#', '_') for x in components]
--- a/src/calibre/devices/usbms/device.py
+++ b/src/calibre/devices/usbms/device.py
@ -782,6 +782,13 @@ class Device(DeviceConfig, DevicePlugin):
        '''
        return default
    def sanitize_path_components(self, components):
        '''
        Perform any device specific sanitization on the path components
        for files to be uploaded to the device
        '''
        return components
    def create_upload_path(self, path, mdata, fname):
        path = os.path.abspath(path)
        extra_components = []
@ -834,6 +841,7 @@ class Device(DeviceConfig, DevicePlugin):
        extra_components = list(map(remove_trailing_periods, extra_components))
        components = shorten_components_to(250 - len(path), extra_components)
        components = self.sanitize_path_components(components)
        filepath = os.path.join(path, *components)
        filedir = os.path.dirname(filepath)
--- a/src/calibre/ebooks/fb2/fb2ml.py
+++ b/src/calibre/ebooks/fb2/fb2ml.py
@ -132,7 +132,8 @@ class FB2MLizer(object):
            href = self.oeb_book.guide['titlepage'].href
            item = self.oeb_book.manifest.hrefs[href]
            if item.spine_position is None:
-                stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts.output_profile)
+                stylizer = Stylizer(item.data, item.href, self.oeb_book,
                        self.opts, self.opts.output_profile)
                output += ''.join(self.dump_text(item.data.find(XHTML('body')), stylizer, item))
        return output
@ -152,7 +153,7 @@ class FB2MLizer(object):
        text = []
        for item in self.oeb_book.spine:
            self.log.debug('Converting %s to FictionBook2 XML' % item.href)
-            stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts.output_profile)
+            stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts, self.opts.output_profile)
            text.append(self.add_page_anchor(item))
            text += self.dump_text(item.data.find(XHTML('body')), stylizer, item)
        return ''.join(text)
--- a/src/calibre/ebooks/lit/output.py
+++ b/src/calibre/ebooks/lit/output.py
@ -32,7 +32,7 @@ class LITOutput(OutputFormatPlugin):
        mangler(oeb, opts)
        rasterizer = SVGRasterizer()
        rasterizer(oeb, opts)
-        lit = LitWriter()
+        lit = LitWriter(self.opts)
        lit(oeb, output_path)
--- a/src/calibre/ebooks/lit/writer.py
+++ b/src/calibre/ebooks/lit/writer.py
@ -134,7 +134,7 @@ def warn(x):
 class ReBinary(object):
    NSRMAP = {'': None, XML_NS: 'xml'}
-    def __init__(self, root, item, oeb, map=HTML_MAP):
+    def __init__(self, root, item, oeb, opts, map=HTML_MAP):
        self.item = item
        self.logger = oeb.logger
        self.manifest = oeb.manifest
@ -143,7 +143,7 @@ class ReBinary(object):
        self.anchors = []
        self.page_breaks = []
        self.is_html  = is_html = map is HTML_MAP
-        self.stylizer = Stylizer(root, item.href, oeb) if is_html else None
+        self.stylizer = Stylizer(root, item.href, oeb, opts) if is_html else None
        self.tree_to_binary(root)
        self.content = self.buf.getvalue()
        self.ahc = self.build_ahc() if is_html else None
@ -295,9 +295,8 @@ def preserve(function):
    return wrapper
 class LitWriter(object):
-    def __init__(self):
+    def __init__(self, opts):
-        # Wow, no options
+        self.opts = opts
        pass
    def _litize_oeb(self):
        oeb = self._oeb
@ -469,7 +468,7 @@ class LitWriter(object):
            secnum = 0
            if isinstance(data, etree._Element):
                self._add_folder(name)
-                rebin = ReBinary(data, item, self._oeb, map=HTML_MAP)
+                rebin = ReBinary(data, item, self._oeb, self.opts, map=HTML_MAP)
                self._add_file(name + '/ahc', rebin.ahc, 0)
                self._add_file(name + '/aht', rebin.aht, 0)
                item.page_breaks = rebin.page_breaks
@ -562,7 +561,7 @@ class LitWriter(object):
        meta.attrib['ms--minimum_level'] = '0'
        meta.attrib['ms--attr5'] = '1'
        meta.attrib['ms--guid'] = '{%s}' % str(uuid.uuid4()).upper()
-        rebin = ReBinary(meta, None, self._oeb, map=OPF_MAP)
+        rebin = ReBinary(meta, None, self._oeb, self.opts, map=OPF_MAP)
        meta = rebin.content
        self._meta = meta
        self._add_file('/meta', meta)
--- a/src/calibre/ebooks/metadata/cli.py
+++ b/src/calibre/ebooks/metadata/cli.py
@ -128,6 +128,10 @@ def do_set_metadata(opts, mi, stream, stream_type):
        mi.title_sort = title_sort(opts.title)
    if getattr(opts, 'tags', None) is not None:
        mi.tags = [t.strip() for t in opts.tags.split(',')]
    if getattr(opts, 'series', None) is not None:
        mi.series = opts.series.strip()
    if getattr(opts, 'series_index', None) is not None:
        mi.series_index = float(opts.series_index.strip())
    if getattr(opts, 'cover', None) is not None:
        ext = os.path.splitext(opts.cover)[1].replace('.', '').upper()
--- a/src/calibre/ebooks/metadata/meta.py
+++ b/src/calibre/ebooks/metadata/meta.py
@ -134,7 +134,10 @@ def metadata_from_filename(name, pat=None):
            mi.authors = aus
            if prefs['swap_author_names'] and mi.authors:
                def swap(a):
-                    parts = a.split()
+                    if ',' in a:
                        parts = a.split(',', 1)
                    else:
                        parts = a.split(None, 1)
                    if len(parts) > 1:
                        t = parts[-1]
                        parts = parts[:-1]
--- a/src/calibre/ebooks/mobi/mobiml.py
+++ b/src/calibre/ebooks/mobi/mobiml.py
@ -92,6 +92,7 @@ class MobiMLizer(object):
    def __call__(self, oeb, context):
        oeb.logger.info('Converting XHTML to Mobipocket markup...')
        self.oeb = oeb
        self.opts = context
        self.profile = profile = context.dest
        self.fnums = fnums = dict((v, k) for k, v in profile.fnums.items())
        self.fmap = KeyMapper(profile.fbase, profile.fbase, fnums.keys())
@ -114,7 +115,7 @@ class MobiMLizer(object):
    def mobimlize_spine(self):
        'Iterate over the spine and convert it to MOBIML'
        for item in self.oeb.spine:
-            stylizer = Stylizer(item.data, item.href, self.oeb, self.profile)
+            stylizer = Stylizer(item.data, item.href, self.oeb, self.opts, self.profile)
            body = item.data.find(XHTML('body'))
            nroot = etree.Element(XHTML('html'), nsmap=MOBI_NSMAP)
            nbody = etree.SubElement(nroot, XHTML('body'))
--- a/src/calibre/ebooks/mobi/reader.py
+++ b/src/calibre/ebooks/mobi/reader.py
@ -563,6 +563,16 @@ class MobiReader(object):
                    recindex = attrib.pop(attr, None) or recindex
                if recindex is not None:
                    attrib['src'] = 'images/%s.jpg' % recindex
                for attr in ('width', 'height'):
                    if attr in attrib:
                        val = attrib[attr]
                        if val.lower().endswith('em'):
                            try:
                                nval = float(val[:-2])
                                nval *= 16 * (168.451/72) # Assume this was set using the Kindle profile
                                attrib[attr] = "%dpx"%int(nval)
                            except:
                                del attrib[attr]
            elif tag.tag == 'pre':
                if not tag.text:
                    tag.tag = 'div'
--- a/src/calibre/ebooks/oeb/factory.py
+++ b/src/calibre/ebooks/oeb/factory.py
@ -1,99 +0,0 @@
 '''
 Registry associating file extensions with Reader classes.
 '''
 from __future__ import with_statement
 __license__   = 'GPL v3'
 __copyright__ = '2008, Marshall T. Vandegrift <llasram@gmail.com>'
 import sys, os, logging
 from itertools import chain
 import calibre
 from calibre.ebooks.oeb.base import OEBError
 from calibre.ebooks.oeb.reader import OEBReader
 from calibre.ebooks.oeb.writer import OEBWriter
 from calibre.ebooks.lit.reader import LitReader
 from calibre.ebooks.lit.writer import LitWriter
 from calibre.ebooks.mobi.reader import MobiReader
 from calibre.ebooks.mobi.writer import MobiWriter
 from calibre.ebooks.oeb.base import OEBBook
 from calibre.ebooks.oeb.profile import Context
 from calibre.utils.config import Config
 __all__ = ['get_reader']
 REGISTRY = {
    '.opf': (OEBReader, None),
    '.lit': (LitReader, LitWriter),
    '.mobi': (MobiReader, MobiWriter),
    }
 def ReaderFactory(path):
    if os.path.isdir(path):
        return OEBReader
    ext = os.path.splitext(path)[1].lower()
    Reader = REGISTRY.get(ext, (None, None))[0]
    if Reader is None:
        raise OEBError('Unknown e-book file extension %r' % ext)
    return Reader
 def WriterFactory(path):
    if os.path.isdir(path):
        return OEBWriter
    ext = os.path.splitext(path)[1].lower()
    if not os.path.exists(path) and not ext:
        return OEBWriter
    Writer = REGISTRY.get(ext, (None, None))[1]
    if Writer is None:
        raise OEBError('Unknown e-book file extension %r' % ext)
    return Writer
 def option_parser(Reader, Writer):
    cfg = Config('ebook-convert', _('Options to control e-book conversion.'))
    Reader.config(cfg)
    for Transform in chain(Reader.TRANSFORMS, Writer.TRANSFORMS):
        Transform.config(cfg)
    Writer.config(cfg)
    parser = cfg.option_parser()
    parser.add_option('--encoding', default=None,
        help=_('Character encoding for input. Default is to auto detect.'))
    parser.add_option('-o', '--output', default=None, 
        help=_('Output file. Default is derived from input filename.'))
    parser.add_option('-p', '--pretty-print', action='store_true',
        default=False, help=_('Produce more human-readable XML output.'))
    parser.add_option('-v', '--verbose', default=0, action='count',
        help=_('Useful for debugging.'))
    return parser
 def main(argv=sys.argv):
    if len(argv) < 3:
        print _("Usage: ebook-convert INFILE OUTFILE [OPTIONS..]")
        return 1
    inpath, outpath = argv[1], argv[2]
    Reader = ReaderFactory(inpath)
    Writer = WriterFactory(outpath)
    parser = option_parser(Reader, Writer)
    opts, args = parser.parse_args(argv[3:])
    if len(args) != 0:
        parser.print_help()
        return 1
    logger = logging.getLogger('ebook-convert')
    calibre.setup_cli_handlers(logger, logging.DEBUG)
    encoding = opts.encoding
    pretty_print = opts.pretty_print
    oeb = OEBBook(encoding=encoding, pretty_print=pretty_print, logger=logger)
    context = Context(Reader.DEFAULT_PROFILE, Writer.DEFAULT_PROFILE)
    reader = Reader.generate(opts)
    writer = Writer.generate(opts)
    transforms = []
    for Transform in chain(Reader.TRANSFORMS, Writer.TRANSFORMS):
        transforms.append(Transform.generate(opts))
    reader(oeb, inpath)
    for transform in transforms:
        transform(oeb, context)
    writer(oeb, outpath)
    return 0
 if __name__ == '__main__':
    sys.exit(main())
--- a/src/calibre/ebooks/oeb/stylizer.py
+++ b/src/calibre/ebooks/oeb/stylizer.py
@ -110,9 +110,9 @@ class CSSSelector(etree.XPath):
 class Stylizer(object):
    STYLESHEETS = WeakKeyDictionary()
-    def __init__(self, tree, path, oeb, profile=PROFILES['PRS505'],
+    def __init__(self, tree, path, oeb, opts, profile=PROFILES['PRS505'],
            extra_css='', user_css=''):
-        self.oeb = oeb
+        self.oeb, self.opts = oeb, opts
        self.profile = profile
        self.logger = oeb.logger
        item = oeb.manifest.hrefs[path]
@ -249,6 +249,8 @@ class Stylizer(object):
                style.update(self._normalize_font(prop.cssValue))
            elif name == 'list-style':
                style.update(self._normalize_list_style(prop.cssValue))
            elif name == 'text-align':
                style.update(self._normalize_text_align(prop.cssValue))
            else:
                style[name] = prop.value
        if 'font-size' in style:
@ -306,6 +308,19 @@ class Stylizer(object):
        return style
    def _normalize_text_align(self, cssvalue):
        style = {}
        text = cssvalue.cssText
        if text == 'inherit':
            style['text-align'] = 'inherit'
        else:
            if text in ('left', 'justify'):
                val = 'left' if self.opts.dont_justify else 'justify'
                style['text-align'] = val
            else:
                style['text-align'] = text
        return style
    def _normalize_font(self, cssvalue):
        composition = ('font-style', 'font-variant', 'font-weight',
                       'font-size', 'line-height', 'font-family')
@ -411,6 +426,7 @@ class Style(object):
        return result
    def _unit_convert(self, value, base=None, font=None):
        ' Return value in pts'
        if isinstance(value, (int, long, float)):
            return value
        try:
@ -447,6 +463,9 @@ class Style(object):
                result = value * 0.40
        return result
    def pt_to_px(self, value):
        return (self._profile.dpi / 72.0) * value
    @property
    def fontSize(self):
        def normalize_fontsize(value, base):
--- a/src/calibre/ebooks/oeb/transforms/flatcss.py
+++ b/src/calibre/ebooks/oeb/transforms/flatcss.py
@ -141,7 +141,7 @@ class CSSFlattener(object):
            bs.append('text-align: '+ \
                    ('left' if self.context.dont_justify else 'justify'))
            body.set('style', '; '.join(bs))
-            stylizer = Stylizer(html, item.href, self.oeb, profile,
+            stylizer = Stylizer(html, item.href, self.oeb, self.context, profile,
                    user_css=self.context.extra_css,
                    extra_css=css)
            self.stylizers[item] = stylizer
--- a/src/calibre/ebooks/oeb/transforms/manglecase.py
+++ b/src/calibre/ebooks/oeb/transforms/manglecase.py
@ -29,13 +29,14 @@ class CaseMangler(object):
    @classmethod
    def generate(cls, opts):
        return cls()
-    
+
    def __call__(self, oeb, context):
        oeb.logger.info('Applying case-transforming CSS...')
        self.oeb = oeb
        self.opts = context
        self.profile = context.source
        self.mangle_spine()
-    
+
    def mangle_spine(self):
        id, href = self.oeb.manifest.generate('manglecase', 'manglecase.css')
        self.oeb.manifest.add(id, href, CSS_MIME, data=CASE_MANGLER_CSS)
@ -44,9 +45,9 @@ class CaseMangler(object):
            relhref = item.relhref(href)
            etree.SubElement(html.find(XHTML('head')), XHTML('link'),
                             rel='stylesheet', href=relhref, type=CSS_MIME)
-            stylizer = Stylizer(html, item.href, self.oeb, self.profile)
+            stylizer = Stylizer(html, item.href, self.oeb, self.opts, self.profile)
            self.mangle_elem(html.find(XHTML('body')), stylizer)
-    
+
    def text_transform(self, transform, text):
        if transform == 'capitalize':
            return text.title()
@ -55,7 +56,7 @@ class CaseMangler(object):
        elif transform == 'lowercase':
            return text.lower()
        return text
-    
+
    def split_text(self, text):
        results = ['']
        isupper = text[0].isupper()
@ -66,7 +67,7 @@ class CaseMangler(object):
                isupper = not isupper
                results.append(char)
        return results
-    
+
    def smallcaps_elem(self, elem, attr):
        texts = self.split_text(getattr(elem, attr))
        setattr(elem, attr, None)
@ -90,7 +91,7 @@ class CaseMangler(object):
                    last.tail = tail
                    child.tail = None
                last = child
-    
+
    def mangle_elem(self, elem, stylizer):
        if not isinstance(elem.tag, basestring) or \
           namespace(elem.tag) != XHTML_NS:
--- a/src/calibre/ebooks/oeb/transforms/rasterize.py
+++ b/src/calibre/ebooks/oeb/transforms/rasterize.py
@ -44,6 +44,7 @@ class SVGRasterizer(object):
    def __call__(self, oeb, context):
        oeb.logger.info('Rasterizing SVG images...')
        self.oeb = oeb
        self.opts = context
        self.profile = context.dest
        self.images = {}
        self.dataize_manifest()
@ -102,7 +103,7 @@ class SVGRasterizer(object):
    def rasterize_spine(self):
        for item in self.oeb.spine:
            html = item.data
-            stylizer = Stylizer(html, item.href, self.oeb, self.profile)
+            stylizer = Stylizer(html, item.href, self.oeb, self.opts, self.profile)
            self.rasterize_item(item, stylizer)
    def rasterize_item(self, item, stylizer):
--- a/src/calibre/ebooks/pdf/reflow.py
+++ b/src/calibre/ebooks/pdf/reflow.py
@ -20,6 +20,10 @@ class Font(object):
 class Column(object):
    # A column contains an element is the element bulges out to
    # the left or the right by at most HFUZZ*col width.
    HFUZZ = 0.2
    def __init__(self):
        self.left = self.right = self.top = self.bottom = 0
        self.width = self.height = 0
@ -41,6 +45,10 @@ class Column(object):
        for x in self.elements:
            yield x
    def contains(self, elem):
        return elem.left > self.left - self.HFUZZ*self.width and \
               elem.right < self.right + self.HFUZZ*self.width
 class Element(object):
    def __eq__(self, other):
@ -132,6 +140,18 @@ class Interval(object):
    def __hash__(self):
        return hash('(%f,%f)'%self.left, self.right)
 class Region(object):
    def __init__(self):
        self.columns = []
        self.top = self.bottom = self.left = self.right = self.width = self.height = 0
    def add_columns(self, columns):
        if not self.columns:
            for x in sorted(columns, cmp=lambda x,y: cmp(x.left, y.left)):
                self.columns.append(x)
        else:
           pass
 class Page(object):
@ -238,11 +258,10 @@ class Page(object):
        return columns
    def find_elements_in_row_of(self, x):
-        interval = Interval(x.top - self.YFUZZ * self.average_text_height,
+        interval = Interval(x.top,
                x.top + self.YFUZZ*(1+self.average_text_height))
        h_interval = Interval(x.left, x.right)
-        m = max(0, x.idx-15)
+        for y in self.elements[x.idx:x.idx+15]:
        for y in self.elements[m:x.idx+15]:
            if y is not x:
                y_interval = Interval(y.top, y.bottom)
                x_interval = Interval(y.left, y.right)
--- a/src/calibre/ebooks/pml/pmlml.py
+++ b/src/calibre/ebooks/pml/pmlml.py
@ -113,7 +113,8 @@ class PMLMLizer(object):
            href = self.oeb_book.guide['titlepage'].href
            item = self.oeb_book.manifest.hrefs[href]
            if item.spine_position is None:
-                stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts.output_profile)
+                stylizer = Stylizer(item.data, item.href, self.oeb_book,
                        self.opts, self.opts.output_profile)
                output += ''.join(self.dump_text(item.data.find(XHTML('body')), stylizer, item))
        return output
--- a/src/calibre/ebooks/rb/rbml.py
+++ b/src/calibre/ebooks/rb/rbml.py
@ -90,7 +90,8 @@ class RBMLizer(object):
            href = self.oeb_book.guide['titlepage'].href
            item = self.oeb_book.manifest.hrefs[href]
            if item.spine_position is None:
-                stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts.output_profile)
+                stylizer = Stylizer(item.data, item.href, self.oeb_book,
                        self.opts, self.opts.output_profile)
                output += ''.join(self.dump_text(item.data.find(XHTML('body')), stylizer, item))
        return output
@ -111,7 +112,7 @@ class RBMLizer(object):
        output = [u'']
        for item in self.oeb_book.spine:
            self.log.debug('Converting %s to RocketBook HTML...' % item.href)
-            stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts.output_profile)
+            stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts, self.opts.output_profile)
            output.append(self.add_page_anchor(item))
            output += self.dump_text(item.data.find(XHTML('body')), stylizer, item)
        return ''.join(output)
--- a/src/calibre/ebooks/rtf/rtfml.py
+++ b/src/calibre/ebooks/rtf/rtfml.py
@ -111,12 +111,13 @@ class RTFMLizer(object):
            href = self.oeb_book.guide['titlepage'].href
            item = self.oeb_book.manifest.hrefs[href]
            if item.spine_position is None:
-                stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts.output_profile)
+                stylizer = Stylizer(item.data, item.href, self.oeb_book,
                        self.opts, self.opts.output_profile)
                output += self.dump_text(item.data.find(XHTML('body')), stylizer)
                output += '{\\page } '
        for item in self.oeb_book.spine:
            self.log.debug('Converting %s to RTF markup...' % item.href)
-            stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts.output_profile)
+            stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts, self.opts.output_profile)
            output += self.dump_text(item.data.find(XHTML('body')), stylizer)
        output += self.footer()
        output = self.insert_images(output)
--- a/src/calibre/ebooks/txt/txtml.py
+++ b/src/calibre/ebooks/txt/txtml.py
@ -54,7 +54,7 @@ class TXTMLizer(object):
        output.append(self.get_toc())
        for item in self.oeb_book.spine:
            self.log.debug('Converting %s to TXT...' % item.href)
-            stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts.output_profile)
+            stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts, self.opts.output_profile)
            content = unicode(etree.tostring(item.data.find(XHTML('body')), encoding=unicode))
            content = self.remove_newlines(content)
            output += self.dump_text(etree.fromstring(content), stylizer)
--- a/src/calibre/gui2/convert/gui_conversion.py
+++ b/src/calibre/gui2/convert/gui_conversion.py
@ -4,9 +4,14 @@ __license__ = 'GPL 3'
 __copyright__ = '2009, John Schember <john@nachtimwald.com>'
 __docformat__ = 'restructuredtext en'
-from calibre.ebooks.conversion.plumber import Plumber
+import os
-from calibre.utils.logging import Log
+from optparse import OptionParser
 from calibre.customize.conversion import OptionRecommendation, DummyReporter
 from calibre.ebooks.conversion.plumber import Plumber
 from calibre.customize.ui import plugin_for_catalog_format
 from calibre.utils.logging import Log
 from calibre.gui2 import choose_dir, Application
 def gui_convert(input, output, recommendations, notification=DummyReporter(),
        abort_after_input_dump=False, log=None):
@ -20,7 +25,7 @@ def gui_convert(input, output, recommendations, notification=DummyReporter(),
    plumber.run()
-def gui_catalog(fmt, title, dbspec, ids, out_file_name,
+def gui_catalog(fmt, title, dbspec, ids, out_file_name, fmt_options,
        notification=DummyReporter(), log=None):
    if log is None:
        log = Log()
@ -31,8 +36,28 @@ def gui_catalog(fmt, title, dbspec, ids, out_file_name,
        db = LibraryDatabase2(dbpath)
    else: # To be implemented in the future
        pass
-    # Implement the interface to the catalog generating code here
+    
-    db
+    # Create a minimal OptionParser that we can append to
    parser = OptionParser()
    args = []
    parser.add_option("--verbose", action="store_true", dest="verbose", default=True)
    opts, args = parser.parse_args()
    # Populate opts
    opts.ids = ids
    opts.search_text = None
    opts.sort_by = None
    # Extract the option dictionary to comma-separated lists
    for option in fmt_options:
        setattr(opts,option, ','.join(fmt_options[option]))
    # Fetch and run the plugin for fmt
    plugin = plugin_for_catalog_format(fmt)
    plugin.run(out_file_name, opts, db)
--- a/src/calibre/gui2/dialogs/catalog.py
+++ b/src/calibre/gui2/dialogs/catalog.py
@ -6,39 +6,131 @@ __license__   = 'GPL v3'
 __copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'
-from PyQt4.Qt import QDialog
+import os, shutil, sys, tempfile
 from PyQt4.Qt import QDialog, QWidget
 from calibre.customize.ui import config
 from calibre.gui2.dialogs.catalog_ui import Ui_Dialog
-from calibre.gui2 import dynamic
+from calibre.gui2 import gprefs, dynamic
-from calibre.customize.ui import available_catalog_formats
+from calibre.customize.ui import available_catalog_formats, catalog_plugins
 from calibre.gui2.catalog.catalog_csv_xml import PluginWidget
 class Catalog(QDialog, Ui_Dialog):
    ''' Catalog Dialog builder'''
    widgets = []
    def __init__(self, parent, dbspec, ids):
        import re, cStringIO
        from calibre import prints as info
        from calibre.gui2 import dynamic
        from PyQt4.uic import compileUi
        QDialog.__init__(self, parent)
        # Run the dialog setup generated from catalog.ui
        self.setupUi(self)
        self.dbspec, self.ids = dbspec, ids
        # Display the number of books we've been passed
        self.count.setText(unicode(self.count.text()).format(len(ids)))
        # Display the last-used title
        self.title.setText(dynamic.get('catalog_last_used_title',
            _('My Books')))
        fmts = sorted([x.upper() for x in available_catalog_formats()])
        # GwR *** Add option tabs for built-in formats
        # This code models #69 in calibre/gui2/dialogs/config/__init__.py
        self.fmts = []
        from calibre.customize.builtins import plugins as builtin_plugins
        from calibre.customize import CatalogPlugin
        for plugin in catalog_plugins():
            if plugin.name in config['disabled_plugins']:
                continue
            name = plugin.name.lower().replace(' ', '_')
            if type(plugin) in builtin_plugins:
                #info("Adding widget for builtin Catalog plugin %s" % plugin.name)                
                try:
                    catalog_widget = __import__('calibre.gui2.catalog.'+name,
                            fromlist=[1])
                    pw = catalog_widget.PluginWidget()
                    pw.initialize(name)
                    pw.ICON = I('forward.svg')    
                    self.widgets.append(pw)
                    [self.fmts.append([file_type.upper(), pw.sync_enabled,pw]) for file_type in plugin.file_types]                    
                except ImportError:
                    info("ImportError with %s" % name)
                    continue
            else:
                # Load dynamic tab
                form = os.path.join(plugin.resources_path,'%s.ui' % name)
                klass = os.path.join(plugin.resources_path,'%s.py' % name)
                compiled_form = os.path.join(plugin.resources_path,'%s_ui.py' % name)
                if os.path.exists(form) and os.path.exists(klass):
                    #info("Adding widget for user-installed Catalog plugin %s" % plugin.name)
                    # Compile the .ui form provided in plugin.zip
                    if not os.path.exists(compiled_form):
                        # info('\tCompiling form', form)
                        buf = cStringIO.StringIO()
                        compileUi(form, buf)
                        dat = buf.getvalue()
                        dat = re.compile(r'QtGui.QApplication.translate\(.+?,\s+"(.+?)(?<!\\)",.+?\)', 
                                         re.DOTALL).sub(r'_("\1")', dat)
                        open(compiled_form, 'wb').write(dat)
                    # Import the dynamic PluginWidget() from .py file provided in plugin.zip
                    try:
                        sys.path.insert(0, plugin.resources_path)
                        catalog_widget = __import__(name, fromlist=[1])
                        pw = catalog_widget.PluginWidget()
                        pw.initialize(name)
                        pw.ICON = I('forward.svg')    
                        self.widgets.append(pw)                        
                        [self.fmts.append([file_type.upper(), pw.sync_enabled,pw]) for file_type in plugin.file_types]
                    except ImportError:
                        info("ImportError with %s" % name)
                        continue
                    finally:
                        sys.path.remove(plugin.resources_path)
                else:
                    info("No dynamic tab resources found for %s" % name)
        self.widgets = sorted(self.widgets, key=lambda x:(x.TITLE, x.TITLE))
        for pw in self.widgets:
            page = self.tabs.addTab(pw,pw.TITLE)
        # Generate a sorted list of installed catalog formats/sync_enabled pairs
        fmts = sorted([x[0] for x in self.fmts])
        self.sync_enabled_formats = []
        for fmt in self.fmts:
            if fmt[1]:
                self.sync_enabled_formats.append(fmt[0])
        # Callback when format changes
        self.format.currentIndexChanged.connect(self.format_changed)
        # Add the installed catalog format list to the format QComboBox
        self.format.addItems(fmts)
-        pref = dynamic.get('catalog_preferred_format', 'EPUB')
+        pref = dynamic.get('catalog_preferred_format', 'CSV')
        idx = self.format.findText(pref)
        if idx > -1:
            self.format.setCurrentIndex(idx)
        if self.sync.isEnabled():
            self.sync.setChecked(dynamic.get('catalog_sync_to_device', True))
-
+                            
    def format_changed(self, idx):
        cf = unicode(self.format.currentText())
-        if cf in ('EPUB', 'MOBI'):
+        if cf in self.sync_enabled_formats:
            self.sync.setEnabled(True)
        else:
            self.sync.setDisabled(True)
--- a/src/calibre/gui2/dialogs/catalog.ui
+++ b/src/calibre/gui2/dialogs/catalog.ui
@ -6,105 +6,121 @@
   <rect>
    <x>0</x>
    <y>0</y>
-    <width>628</width>
+    <width>611</width>
-    <height>503</height>
+    <height>514</height>
   </rect>
  </property>
  <property name="windowTitle">
   <string>Generate catalog</string>
  </property>
  <property name="windowIcon">
-   <iconset resource="../../../work/calibre/resources/images.qrc">
+   <iconset>
    <normaloff>:/images/library.png</normaloff>:/images/library.png</iconset>
  </property>
-  <layout class="QGridLayout" name="gridLayout">
+  <widget class="QDialogButtonBox" name="buttonBox">
-   <item row="2" column="0">
+   <property name="geometry">
-    <widget class="QDialogButtonBox" name="buttonBox">
+    <rect>
-     <property name="orientation">
+     <x>430</x>
-      <enum>Qt::Horizontal</enum>
+     <y>470</y>
-     </property>
+     <width>164</width>
-     <property name="standardButtons">
+     <height>32</height>
-      <set>QDialogButtonBox::Cancel|QDialogButtonBox::Ok</set>
+    </rect>
-     </property>
+   </property>
-    </widget>
+   <property name="orientation">
-   </item>
+    <enum>Qt::Horizontal</enum>
-   <item row="1" column="0">
+   </property>
-    <widget class="QTabWidget" name="tabs">
+   <property name="standardButtons">
-     <property name="currentIndex">
+    <set>QDialogButtonBox::Cancel|QDialogButtonBox::Ok</set>
-      <number>0</number>
+   </property>
-     </property>
+  </widget>
-     <widget class="QWidget" name="tab">
+  <widget class="QTabWidget" name="tabs">
-      <attribute name="title">
+   <property name="geometry">
-       <string>Catalog options</string>
+    <rect>
-      </attribute>
+     <x>12</x>
-      <layout class="QGridLayout" name="gridLayout_2">
+     <y>39</y>
-       <item row="0" column="0">
+     <width>579</width>
-        <widget class="QLabel" name="label">
+     <height>411</height>
-         <property name="text">
+    </rect>
-          <string>Catalog &amp;format:</string>
+   </property>
-         </property>
+   <property name="currentIndex">
-         <property name="buddy">
+    <number>0</number>
-          <cstring>format</cstring>
+   </property>
-         </property>
+   <widget class="QWidget" name="tab">
-        </widget>
+    <attribute name="title">
-       </item>
+     <string>Catalog options</string>
-       <item row="0" column="2">
+    </attribute>
-        <widget class="QComboBox" name="format"/>
+    <layout class="QGridLayout" name="gridLayout_2">
-       </item>
+     <item row="0" column="0">
-       <item row="1" column="0">
+      <widget class="QLabel" name="label">
-        <widget class="QLabel" name="label_2">
+       <property name="text">
-         <property name="text">
+        <string>Catalog &amp;format:</string>
-          <string>Catalog &amp;title (existing catalog with the same title will be replaced):</string>
+       </property>
-         </property>
+       <property name="buddy">
-         <property name="wordWrap">
+        <cstring>format</cstring>
-          <bool>true</bool>
+       </property>
-         </property>
+      </widget>
-         <property name="buddy">
+     </item>
-          <cstring>title</cstring>
+     <item row="0" column="2">
-         </property>
+      <widget class="QComboBox" name="format"/>
-        </widget>
+     </item>
-       </item>
+     <item row="1" column="0">
-       <item row="2" column="1">
+      <widget class="QLabel" name="label_2">
-        <spacer name="verticalSpacer">
+       <property name="text">
-         <property name="orientation">
+        <string>Catalog &amp;title (existing catalog with the same title will be replaced):</string>
-          <enum>Qt::Vertical</enum>
+       </property>
-         </property>
+       <property name="wordWrap">
-         <property name="sizeHint" stdset="0">
+        <bool>true</bool>
-          <size>
+       </property>
-           <width>20</width>
+       <property name="buddy">
-           <height>299</height>
+        <cstring>title</cstring>
-          </size>
+       </property>
-         </property>
+      </widget>
-        </spacer>
+     </item>
-       </item>
+     <item row="1" column="2">
-       <item row="3" column="0">
+      <widget class="QLineEdit" name="title"/>
-        <widget class="QCheckBox" name="sync">
+     </item>
-         <property name="text">
+     <item row="3" column="0">
-          <string>&amp;Send catalog to device automatically</string>
+      <widget class="QCheckBox" name="sync">
-         </property>
+       <property name="text">
-        </widget>
+        <string>&amp;Send catalog to device automatically</string>
-       </item>
+       </property>
-       <item row="1" column="2">
+      </widget>
-        <widget class="QLineEdit" name="title"/>
+     </item>
-       </item>
+     <item row="2" column="1">
-      </layout>
+      <spacer name="verticalSpacer">
-     </widget>
+       <property name="orientation">
-    </widget>
+        <enum>Qt::Vertical</enum>
-   </item>
+       </property>
-   <item row="0" column="0">
+       <property name="sizeHint" stdset="0">
-    <widget class="QLabel" name="count">
+        <size>
-     <property name="font">
+         <width>20</width>
-      <font>
+         <height>299</height>
-       <weight>75</weight>
+        </size>
-       <bold>true</bold>
+       </property>
-      </font>
+      </spacer>
-     </property>
+     </item>
-     <property name="text">
+    </layout>
-      <string>Generate catalog for {0} books</string>
+   </widget>
-     </property>
+  </widget>
-    </widget>
+  <widget class="QLabel" name="count">
-   </item>
+   <property name="geometry">
-  </layout>
+    <rect>
     <x>12</x>
     <y>12</y>
     <width>205</width>
     <height>17</height>
    </rect>
   </property>
   <property name="font">
    <font>
     <weight>75</weight>
     <bold>true</bold>
    </font>
   </property>
   <property name="text">
    <string>Generate catalog for {0} books</string>
   </property>
  </widget>
 </widget>
 <resources>
  <include location="../../../work/calibre/resources/images.qrc"/>
--- a/src/calibre/gui2/dialogs/metadata_single.py
+++ b/src/calibre/gui2/dialogs/metadata_single.py
@ -532,7 +532,7 @@ class MetadataSingleDialog(ResizableDialog, Ui_MetadataSingleDialog):
            if self.cover_fetcher.exception is not None:
                err = self.cover_fetcher.exception
                error_dialog(self, _('Cannot fetch cover'),
-                    _('<b>Could not fetch cover.</b><br/>')+repr(err)).exec_()
+                    _('<b>Could not fetch cover.</b><br/>')+unicode(err)).exec_()
                return
            pix = QPixmap()
--- a/src/calibre/gui2/tag_view.py
+++ b/src/calibre/gui2/tag_view.py
@ -215,7 +215,7 @@ class TagsModel(QAbstractItemModel):
            return QModelIndex()
        child_item = index.internalPointer()
-        parent_item = child_item.parent
+        parent_item = getattr(child_item, 'parent', None)
        if parent_item is self.root_item or parent_item is None:
            return QModelIndex()
--- a/src/calibre/gui2/tools.py
+++ b/src/calibre/gui2/tools.py
@ -238,19 +238,36 @@ def fetch_scheduled_recipe(arg):
 def generate_catalog(parent, dbspec, ids):
    from calibre.gui2.dialogs.catalog import Catalog
    # Build the Catalog dialog in gui2.dialogs.catalog
    d = Catalog(parent, dbspec, ids)
    if d.exec_() != d.Accepted:
        return None
    # Create the output file
    out = PersistentTemporaryFile(suffix='_catalog_out.'+d.catalog_format.lower())
    # Retrieve plugin options
    fmt_options = {}
    for x in range(d.tabs.count()):
        if str(d.tabs.tabText(x)).find(str(d.catalog_format)) > -1:
            for fmt in d.fmts:
                if fmt[0] == d.catalog_format:
                    fmt_options = fmt[2].options()
                    # print "gui2.tools:generate_catalog(): options for %s: %s" % (fmt[0], fmt_options)
    args = [
        d.catalog_format,
        d.catalog_title,
        dbspec,
        ids,
        out.name,
        fmt_options
        ]
    out.close()
    # This calls gui2.convert.gui_conversion:gui_catalog()
    return 'gui_catalog', args, _('Generate catalog'), out.name, d.catalog_sync, \
            d.catalog_title
--- a/src/calibre/gui2/ui.py
+++ b/src/calibre/gui2/ui.py
@ -9,7 +9,7 @@ __docformat__ = 'restructuredtext en'
 '''The main GUI'''
-import os, sys, textwrap, collections, time
+import atexit, os, shutil, sys, tempfile, textwrap, collections, time
 from xml.parsers.expat import ExpatError
 from Queue import Queue, Empty
 from threading import Thread
@ -357,7 +357,7 @@ class Main(MainWindow, Ui_MainWindow, DeviceGUI):
        cm.addAction(_('Bulk convert'))
        cm.addSeparator()
        ac = cm.addAction(
-                _('Create catalog of the books in your calibre library'))
+                _('Create catalog of books in your calibre library'))
        ac.triggered.connect(self.generate_catalog)
        self.action_convert.setMenu(cm)
        self._convert_single_hook = partial(self.convert_ebook, bulk=False)
@ -1359,26 +1359,32 @@ class Main(MainWindow, Ui_MainWindow, DeviceGUI):
    ############################### Generate catalog ###########################
-    def generate_catalog(self):
+    def generate_catalog(self):    
        rows = self.library_view.selectionModel().selectedRows()
-        if not rows:
+        if not rows or len(rows) < 2:
            rows = xrange(self.library_view.model().rowCount(QModelIndex()))
        ids = map(self.library_view.model().id, rows)
        dbspec = None
        if not ids:
            return error_dialog(self, _('No books selected'),
                    _('No books selected to generate catalog for'),
                    show=True)
        # Calling gui2.tools:generate_catalog()
        ret = generate_catalog(self, dbspec, ids)
        if ret is None:
            return
        func, args, desc, out, sync, title = ret
        fmt = os.path.splitext(out)[1][1:].upper()
        job = self.job_manager.run_job(
                Dispatcher(self.catalog_generated), func, args=args,
                    description=desc)
        job.catalog_file_path = out
-        job.catalog_sync, job.catalog_title = sync, title
+        job.fmt = fmt
        job.catalog_sync, job.catalog_title = sync, title        
        self.status_bar.showMessage(_('Generating %s catalog...')%fmt)
    def catalog_generated(self, job):
@ -1392,8 +1398,13 @@ class Main(MainWindow, Ui_MainWindow, DeviceGUI):
            dynamic.set('catalogs_to_be_synced', sync)
        self.status_bar.showMessage(_('Catalog generated.'), 3000)
        self.sync_catalogs()
-
+		if job.fmt in ['CSV','XML']:
-
+			export_dir = choose_dir(self, 'Export Catalog Directory', 
 										          'Select destination for %s.%s' % (job.catalog_title, job.fmt.lower()))
 			if export_dir:
 				destination = os.path.join(export_dir, '%s.%s' % (job.catalog_title, job.fmt.lower()))
 				shutil.copyfile(job.catalog_file_path, destination)
    ############################### Fetch news #################################
    def download_scheduled_recipe(self, arg):
--- a/src/calibre/gui2/viewer/config.ui
+++ b/src/calibre/gui2/viewer/config.ui
@ -7,14 +7,14 @@
    <x>0</x>
    <y>0</y>
    <width>479</width>
-    <height>574</height>
+    <height>606</height>
   </rect>
  </property>
  <property name="windowTitle">
   <string>Configure Ebook viewer</string>
  </property>
  <property name="windowIcon">
-   <iconset resource="../../../../resources/images.qrc">
+   <iconset>
    <normaloff>:/images/config.svg</normaloff>:/images/config.svg</iconset>
  </property>
  <layout class="QGridLayout" name="gridLayout_4">
@ -164,7 +164,7 @@
              </item>
             </widget>
            </item>
-            <item row="6" column="0" colspan="2">
+            <item row="7" column="0" colspan="2">
             <widget class="QCheckBox" name="opt_remember_window_size">
              <property name="text">
               <string>Remember last used &amp;window size</string>
@ -218,6 +218,13 @@
              </property>
             </widget>
            </item>
            <item row="6" column="0" colspan="2">
             <widget class="QCheckBox" name="opt_fit_images">
              <property name="text">
               <string>&amp;Resize images larger than the viewer window (needs restart)</string>
              </property>
             </widget>
            </item>
           </layout>
          </item>
          <item row="3" column="0">
--- a/src/calibre/gui2/viewer/documentview.py
+++ b/src/calibre/gui2/viewer/documentview.py
@ -10,7 +10,7 @@ from base64 import b64encode
 from PyQt4.Qt import QSize, QSizePolicy, QUrl, SIGNAL, Qt, QTimer, \
                     QPainter, QPalette, QBrush, QFontDatabase, QDialog, \
                     QColor, QPoint, QImage, QRegion, QVariant, QIcon, \
-                     QFont, QObject, QApplication, pyqtSignature, QAction
+                     QFont, pyqtSignature, QAction
 from PyQt4.QtWebKit import QWebPage, QWebView, QWebSettings
 from calibre.utils.config import Config, StringConfig
@ -21,7 +21,7 @@ from calibre.constants import iswindows
 from calibre import prints, guess_type
 from calibre.gui2.viewer.keys import SHORTCUTS
-bookmarks = referencing = hyphenation = jquery = jquery_scrollTo = hyphenator = None
+bookmarks = referencing = hyphenation = jquery = jquery_scrollTo = hyphenator = images =None
 def load_builtin_fonts():
    base = P('fonts/liberation/*.ttf')
@ -42,6 +42,8 @@ def config(defaults=None):
              help=_('Set the user CSS stylesheet. This can be used to customize the look of all books.'))
    c.add_opt('max_view_width', default=6000,
            help=_('Maximum width of the viewer window, in pixels.'))
    c.add_opt('fit_images', default=True,
            help=_('Resize images larger than the viewer window to fit inside it'))
    c.add_opt('hyphenate', default=False, help=_('Hyphenate text'))
    c.add_opt('hyphenate_default_lang', default='en',
            help=_('Default language for hyphenation rules'))
@ -59,20 +61,6 @@ def config(defaults=None):
    return c
 class PythonJS(QObject):
    def __init__(self, callback):
        QObject.__init__(self, QApplication.instance())
        self.setObjectName("py_bridge")
        self._callback = callback
    @pyqtSignature("QString")
    def callback(self, msg):
        print "callback called"
        self._callback(msg)
 class ConfigDialog(QDialog, Ui_Dialog):
    def __init__(self, shortcuts, parent=None):
@ -110,6 +98,7 @@ class ConfigDialog(QDialog, Ui_Dialog):
        self.shortcut_config = ShortcutConfig(shortcuts, parent=self)
        p = self.tabs.widget(1)
        p.layout().addWidget(self.shortcut_config)
        self.opt_fit_images.setChecked(opts.fit_images)
    def accept(self, *args):
@ -122,6 +111,7 @@ class ConfigDialog(QDialog, Ui_Dialog):
        c.set('standard_font', {0:'serif', 1:'sans', 2:'mono'}[self.standard_font.currentIndex()])
        c.set('user_css', unicode(self.css.toPlainText()))
        c.set('remember_window_size', self.opt_remember_window_size.isChecked())
        c.set('fit_images', self.opt_fit_images.isChecked())
        c.set('max_view_width', int(self.max_view_width.value()))
        c.set('hyphenate', self.hyphenate.isChecked())
        idx = self.hyphenate_default_lang.currentIndex()
@ -157,7 +147,6 @@ class Document(QWebPage):
        self.setObjectName("py_bridge")
        self.debug_javascript = False
        self.current_language = None
        #self.js_bridge = PythonJS(self.js_callback)
        self.setLinkDelegationPolicy(self.DelegateAllLinks)
        self.scroll_marks = []
@ -197,9 +186,14 @@ class Document(QWebPage):
        opts = config().parse()
        self.hyphenate = opts.hyphenate
        self.hyphenate_default_lang = opts.hyphenate_default_lang
        self.do_fit_images = opts.fit_images
    def fit_images(self):
        if self.do_fit_images:
            self.javascript('setup_image_scaling_handlers()')
    def load_javascript_libraries(self):
-        global bookmarks, referencing, hyphenation, jquery, jquery_scrollTo, hyphenator
+        global bookmarks, referencing, hyphenation, jquery, jquery_scrollTo, hyphenator, images
        self.mainFrame().addToJavaScriptWindowObject("py_bridge", self)
        if jquery is None:
            jquery = P('content_server/jquery.js', data=True)
@ -215,6 +209,9 @@ class Document(QWebPage):
        if referencing is None:
            referencing = P('viewer/referencing.js', data=True)
        self.javascript(referencing)
        if images is None:
            images = P('viewer/images.js', data=True)
        self.javascript(images)
        if hyphenation is None:
            hyphenation = P('viewer/hyphenation.js', data=True)
        self.javascript(hyphenation)
@ -353,7 +350,13 @@ class Document(QWebPage):
        return self.mainFrame().contentsSize().width() # offsetWidth gives inaccurate results
    def set_bottom_padding(self, amount):
-        self.javascript('$("body").css("padding-bottom", "%dpx")' % amount)
+        padding = '%dpx'%amount
        try:
            old_padding = unicode(self.javascript('$("body").css("padding-bottom")').toString())
        except:
            old_padding = ''
        if old_padding != padding:
            self.javascript('$("body").css("padding-bottom", "%s")' % padding)
 class EntityDeclarationProcessor(object):
@ -541,6 +544,7 @@ class DocumentView(QWebView):
            return
        self.loading_url = None
        self.document.set_bottom_padding(0)
        self.document.fit_images()
        self._size_hint = self.document.mainFrame().contentsSize()
        scrolled = False
        if self.to_bottom:
--- a/src/calibre/library/catalog.py
+++ b/src/calibre/library/catalog.py
@ -40,8 +40,9 @@ class CSV_XML(CatalogPlugin):
        from calibre.utils.logging import Log
        log = Log()
-        self.fmt = path_to_output[path_to_output.rfind('.') + 1:]
+        self.fmt = path_to_output.rpartition('.')[2]
-        if opts.verbose:
+        
        if False and opts.verbose:
            log("%s:run" % self.name)
            log(" path_to_output: %s" % path_to_output)
            log(" Output format: %s" % self.fmt)
@ -53,7 +54,7 @@ class CSV_XML(CatalogPlugin):
            log(" opts:")
            for key in keys:
                log("  %s: %s" % (key, opts_dict[key]))
-
+		
        # Get the sorted, filtered database as a dictionary
        data = self.search_sort_db(db, opts)
--- a/src/calibre/library/cli.py
+++ b/src/calibre/library/cli.py
@ -644,6 +644,10 @@ def catalog_option_parser(args):
    output, fmt = validate_command_line(parser, args, log)
    # Add options common to all catalog plugins
    parser.add_option('-i', '--ids', default=None, dest='ids',
                      help=_("Comma-separated list of database IDs to catalog.\n"
                      "If declared, --search is ignored.\n"
                             "Default: all"))
    parser.add_option('-s', '--search', default=None, dest='search_text',
                      help=_("Filter the results by the search query. "
                          "For the format of the search query, please see "
@ -656,31 +660,6 @@ def catalog_option_parser(args):
    # Add options specific to fmt plugin
    plugin = add_plugin_parser_options(fmt, parser, log)
    # Merge options from GUI Preferences
    '''
    # Placeholder sample code until we implement GUI preferences
    from calibre.library.save_to_disk import config
    c = config()
    for pref in ['asciiize', 'update_metadata', 'write_opf', 'save_cover']:
        opt = c.get_option(pref)
        switch = '--dont-'+pref.replace('_', '-')
        parser.add_option(switch, default=True, action='store_false',
                help=opt.help+' '+_('Specifying this switch will turn '
                    'this behavior off.'), dest=pref)
    for pref in ['timefmt', 'template', 'formats']:
        opt = c.get_option(pref)
        switch = '--'+pref
        parser.add_option(switch, default=opt.default,
                help=opt.help, dest=pref)
    for pref in ('replace_whitespace', 'to_lowercase'):
        opt = c.get_option(pref)
        switch = '--'+pref.replace('_', '-')
        parser.add_option(switch, default=False, action='store_true',
                help=opt.help)
    '''
    return parser, plugin, log
 def command_catalog(args, dbpath):
@ -693,6 +672,9 @@ def command_catalog(args, dbpath):
        return 1
    if opts.verbose:
        log("library.cli:command_catalog dispatching to plugin %s" % plugin.name)
    if opts.ids:
        opts.ids = [int(id) for id in opts.ids.split(',')]    
    with plugin:
        plugin.run(args[1], opts, get_db(dbpath, opts))
    return 0
--- a/src/calibre/library/database2.py
+++ b/src/calibre/library/database2.py
@ -1634,13 +1634,15 @@ class LibraryDatabase2(LibraryDatabase):
        for i in iter(self):
            yield i[x]
-    def get_data_as_dict(self, prefix=None, authors_as_string=False):
+    def get_data_as_dict(self, prefix=None, authors_as_string=False, ids=None):
        '''
        Return all metadata stored in the database as a dict. Includes paths to
        the cover and each format.
        :param prefix: The prefix for all paths. By default, the prefix is the absolute path
        to the library folder.
        :param ids: Set of ids to return the data for. If None return data for
        all entries in database.
        '''
        if prefix is None:
            prefix = self.library_path
@ -1650,11 +1652,14 @@ class LibraryDatabase2(LibraryDatabase):
        data = []
        for record in self.data:
            if record is None: continue
            db_id = record[FIELD_MAP['id']]
            if ids is not None and db_id not in ids:
                continue
            x = {}
            for field in FIELDS:
                x[field] = record[FIELD_MAP[field]]
            data.append(x)
-            x['id'] = record[FIELD_MAP['id']]
+            x['id'] = db_id
            x['formats'] = []
            if not x['authors']:
                x['authors'] = _('Unknown')
--- a/src/calibre/utils/config.py
+++ b/src/calibre/utils/config.py
@ -524,6 +524,7 @@ class DynamicConfig(dict):
                    pass
                except:
                    import traceback
                    print 'Failed to unpickle stored object:'
                    traceback.print_exc()
                    d = {}
        self.clear()
--- a/src/calibre/utils/localization.py
+++ b/src/calibre/utils/localization.py
@ -104,6 +104,7 @@ _extra_lang_codes = {
        'en_CY' : _('English (Cyprus)'),
        'en_PK' : _('English (Pakistan)'),
        'en_SG' : _('English (Singapore)'),
        'en_YE' : _('English (Yemen)'),
        'de_AT' : _('German (AT)'),
        'nl'    : _('Dutch (NL)'),
        'nl_BE' : _('Dutch (BE)'),
--- a/src/calibre/utils/resources.py
+++ b/src/calibre/utils/resources.py
@ -9,9 +9,22 @@ __docformat__ = 'restructuredtext en'
 import __builtin__, sys, os
 _dev_path = os.environ.get('CALIBRE_DEVELOP_FROM', None)
 if _dev_path is not None:
    _dev_path = os.path.join(os.path.abspath(os.path.dirname(_dev_path)), 'resources')
    if not os.path.exists(_dev_path):
        _dev_path = None
 def get_path(path, data=False):
    global _dev_path
    path = path.replace(os.sep, '/')
-    path = os.path.join(sys.resources_location, *path.split('/'))
+    base = None
    if _dev_path is not None:
        if os.path.exists(os.path.join(_dev_path, *path.split('/'))):
            base = _dev_path
    if base is None:
        base = sys.resources_location
    path = os.path.join(base, *path.split('/'))
    if data:
        return open(path, 'rb').read()
    return path
--- a/src/calibre/web/feeds/news.py
+++ b/src/calibre/web/feeds/news.py
@ -357,9 +357,17 @@ class BasicNewsRecipe(Recipe):
        Override in a subclass to customize extraction of the :term:`URL` that points
        to the content for each article. Return the
        article URL. It is called with `article`, an object representing a parsed article
-        from a feed. See `feedsparser <http://www.feedparser.org/docs/>`_.
+        from a feed. See `feedparser <http://www.feedparser.org/docs/>`_.
-        By default it returns `article.link <http://www.feedparser.org/docs/reference-entry-link.html>`_.
+        By default it looks for the original link (for feeds syndicated via a
        service like feedburner or pheedo) and if found,
        returns that or else returns
        `article.link <http://www.feedparser.org/docs/reference-entry-link.html>`_.
        '''
        for key in article.keys():
            if key.endswith('_origlink'):
                url = article[key]
                if url and url.startswith('http://'):
                    return url
        return article.get('link',  None)
    def preprocess_html(self, soup):