Fix #5953 (New recipe for london review of books)

2025-08-30 23:00:21 -04:00 · 2010-06-23 11:28:22 -06:00 · 2010-06-23 11:28:22 -06:00 · 8bb493275e
commit 8bb493275e
parent d0e85129ea
4 changed files with 98 additions and 17 deletions
--- a/resources/images/news/lrb.png
+++ b/resources/images/news/lrb.png
--- a/resources/images/news/lrb_payed.png
+++ b/resources/images/news/lrb_payed.png
--- a/resources/recipes/lrb.recipe
+++ b/resources/recipes/lrb.recipe
@ -1,6 +1,6 @@
 __license__   = 'GPL v3'
-__copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
+__copyright__ = '2008-2010, Darko Miletic <darko.miletic at gmail.com>'
 '''
 lrb.co.uk
 '''
@ -8,32 +8,38 @@ lrb.co.uk
 from calibre.web.feeds.news import BasicNewsRecipe
 class LondonReviewOfBooks(BasicNewsRecipe):
-    title                 = u'London Review of Books'
+    title                 = 'London Review of Books (free)'
-    __author__            = u'Darko Miletic'
+    __author__            = 'Darko Miletic'
-    description           = u'Literary review publishing essay-length book reviews and topical articles on politics, literature, history, philosophy, science and the arts by leading writers and thinkers'
+    description           = 'Literary review publishing essay-length book reviews and topical articles on politics, literature, history, philosophy, science and the arts by leading writers and thinkers'
-    category              = 'news, literature, England'
+    category              = 'news, literature, UK'
-    publisher             = 'London Review of Books'
+    publisher             = 'LRB ltd.'
-    oldest_article        = 7
+    oldest_article        = 15
    max_articles_per_feed = 100
    language              = 'en_GB'
    no_stylesheets        = True
    use_embedded_content  = False
    encoding              = 'utf-8'
    publication_type      = 'magazine'
    masthead_url          = 'http://www.lrb.co.uk/assets/images/lrb_logo_big.gif'
    extra_css             = ' body{font-family: Georgia,Palatino,"Palatino Linotype",serif} '
-    conversion_options = {  
+    conversion_options = {
                             'comments'  : description
                            ,'tags'      : category
                            ,'language'  : language
                            ,'publisher' : publisher
                         }
-    
+
-    keep_only_tags = [dict(name='div' , attrs={'id'   :'main'})]
+    keep_only_tags = [dict(attrs={'class':['article-body indent','letters','article-list']})]
-    remove_tags = [
+    remove_attributes = ['width','height']
                    dict(name='div' , attrs={'class':['pagetools','issue-nav-controls','nocss']})
                   ,dict(name='div' , attrs={'id'   :['mainmenu','precontent','otherarticles']     })
                   ,dict(name='span', attrs={'class':['inlineright','article-icons']})
                   ,dict(name='ul'  , attrs={'class':'article-controls'})
                   ,dict(name='p'   , attrs={'class':'meta-info'       })
                  ]
    feeds = [(u'London Review of Books', u'http://www.lrb.co.uk/lrbrss.xml')]
    def get_cover_url(self):
        cover_url = None
        soup = self.index_to_soup('http://www.lrb.co.uk/')
        cover_item = soup.find('p',attrs={'class':'cover'})
        if cover_item:
           cover_url = 'http://www.lrb.co.uk' + cover_item.a.img['src']
        return cover_url
--- a/resources/recipes/lrb_payed.recipe
+++ b/resources/recipes/lrb_payed.recipe
@ -0,0 +1,75 @@
 __license__   = 'GPL v3'
 __copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
 '''
 lrb.co.uk
 '''
 from calibre import strftime
 from calibre.web.feeds.news import BasicNewsRecipe
 class LondonReviewOfBooksPayed(BasicNewsRecipe):
    title                 = 'London Review of Books'
    __author__            = 'Darko Miletic'
    description           = 'Subscription content. Literary review publishing essay-length book reviews and topical articles on politics, literature, history, philosophy, science and the arts by leading writers and thinkers'
    category              = 'news, literature, UK'
    publisher             = 'LRB Ltd.'
    max_articles_per_feed = 100
    language              = 'en_GB'
    no_stylesheets        = True
    delay                 = 1
    use_embedded_content  = False
    encoding              = 'utf-8'
    INDEX                 = 'http://www.lrb.co.uk'
    LOGIN                 = INDEX + '/login'
    masthead_url          = INDEX + '/assets/images/lrb_logo_big.gif'
    needs_subscription    = True
    publication_type      = 'magazine'
    extra_css             = ' body{font-family: Georgia,Palatino,"Palatino Linotype",serif} '
    def get_browser(self):
        br = BasicNewsRecipe.get_browser()
        if self.username is not None and self.password is not None:
            br.open(self.LOGIN)
            br.select_form(nr=1)
            br['username'] = self.username
            br['password'] = self.password
            br.submit()
        return br
    def parse_index(self):
        articles = []
        soup = self.index_to_soup(self.INDEX)
        cover_item = soup.find('p',attrs={'class':'cover'})
        lrbtitle = self.title
        if  cover_item:
            self.cover_url = self.INDEX + cover_item.a.img['src']
            content = self.INDEX + cover_item.a['href']
            soup2 = self.index_to_soup(content)
            sitem = soup2.find(attrs={'class':'article-list'})
            lrbtitle = soup2.head.title.string
            for item in sitem.findAll('a',attrs={'class':'title'}):
                description = u''
                title_prefix = u''
                feed_link = item
                if feed_link.has_key('href'):
                    url   = self.INDEX + feed_link['href']
                    title = title_prefix + self.tag_to_string(feed_link)
                    date  = strftime(self.timefmt)
                    articles.append({
                                      'title'      :title
                                     ,'date'       :date
                                     ,'url'        :url
                                     ,'description':description
                                    })
        return [(lrbtitle, articles)]
    conversion_options = {
                             'comments'  : description
                            ,'tags'      : category
                            ,'language'  : language
                            ,'publisher' : publisher
                         }
    keep_only_tags = [dict(name='div' , attrs={'class':['article-body indent','letters']})]
    remove_attributes = ['width','height']