Update London Review of Books (subscription)

2025-11-29 01:35:02 -05:00 · 2012-09-19 09:16:02 +05:30 · 2012-09-19 09:16:02 +05:30 · a7d24e2b64
commit a7d24e2b64
parent fabe29c577
1 changed files with 18 additions and 8 deletions
--- a/recipes/lrb_payed.recipe
+++ b/recipes/lrb_payed.recipe
@ -1,15 +1,15 @@
 __license__   = 'GPL v3'
 __copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
 '''
 lrb.co.uk
 '''
 import re
 from calibre import strftime
 from calibre.web.feeds.news import BasicNewsRecipe
 class LondonReviewOfBooksPayed(BasicNewsRecipe):
    title                 = 'London Review of Books'
-    __author__            = 'Darko Miletic'
+    __author__            = 'Rich Shang, Darko Miletic'
    description           = 'Subscription content. Literary review publishing essay-length book reviews and topical articles on politics, literature, history, philosophy, science and the arts by leading writers and thinkers'
    category              = 'news, literature, UK'
    publisher             = 'LRB Ltd.'
@ -41,9 +41,12 @@ class LondonReviewOfBooksPayed(BasicNewsRecipe):
        articles = []
        soup = self.index_to_soup(self.INDEX)
        cover_item = soup.find('p',attrs={'class':'cover'})
        dates = str(soup.find('span', attrs={'class':'coverdate'}))
        newdates = re.sub('\<.*\>','',re.split('<br />',dates)[1])
        self.timefmt = ' [%s]'%newdates
        lrbtitle = self.title
        if  cover_item:
-            self.cover_url = self.INDEX + cover_item.a.img['src']
+            self.cover_url = re.sub('/m/','/l/',cover_item.a.img['src'])
            content = self.INDEX + cover_item.a['href']
            soup2 = self.index_to_soup(content)
            sitem = soup2.find(attrs={'class':'article-list'})
@ -54,13 +57,20 @@ class LondonReviewOfBooksPayed(BasicNewsRecipe):
                feed_link = item
                if feed_link.has_key('href'):
                    url   = self.INDEX + feed_link['href']
-                    title = title_prefix + self.tag_to_string(feed_link)
+                    title_link = re.split('<br />',str(feed_link))
                    if len (title_link) > 1:
                        title = title_prefix + re.sub('\<.*\>','',title_link[0]) + ' - ' + re.sub('\<.*\>','',title_link[1])
                    else:
                        title = title_prefix + self.tag_to_string(feed_link)
                    desc = item.findNext('li')
                    if desc is not None and desc.find('cite') is not None and desc.find('ul') is None:
                        description=self.tag_to_string(desc)
                    date  = strftime(self.timefmt)
                    articles.append({
-                                      'title'      :title
+                                        'title'      :title
-                                     ,'date'       :date
+                                        ,'date'       :date
-                                     ,'url'        :url
+                                        ,'url'        :url
-                                     ,'description':description
+                                        ,'description':description
                                    })
        return [(lrbtitle, articles)]