From 8bb493275e2bcd15b0770669eae5c7b1964e65e1 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Wed, 23 Jun 2010 11:28:22 -0600 Subject: [PATCH] Fix #5953 (New recipe for london review of books) --- resources/images/news/lrb.png | Bin 0 -> 315 bytes resources/images/news/lrb_payed.png | Bin 0 -> 315 bytes resources/recipes/lrb.recipe | 40 ++++++++------- resources/recipes/lrb_payed.recipe | 75 ++++++++++++++++++++++++++++ 4 files changed, 98 insertions(+), 17 deletions(-) create mode 100644 resources/images/news/lrb.png create mode 100644 resources/images/news/lrb_payed.png create mode 100644 resources/recipes/lrb_payed.recipe diff --git a/resources/images/news/lrb.png b/resources/images/news/lrb.png new file mode 100644 index 0000000000000000000000000000000000000000..da966d6a1ac856337112794325ee8ae9d1ed3e1c GIT binary patch literal 315 zcmeAS@N?(olHy`uVBq!ia0vp^0wB!60wlNoGJgf6n3BBRT^Rni_n+Ah2>S z4={E+nQaFWEGuwK2hw1@3^B*n9tLtUJY5_^G|q3GcvAF$0tf4H8|$W7OVp|_%{rsl zrjllT^5tee8>g(ATTGj|l~UGNOETg8b7 zGE(Y#&*OH`Z?jX@anVb=wI6k}{ru@V<>mee#s_Rt>RXRZl?U3XTH+c}l9E`GYL#4+ z3Zxi}3=9o)4a{_nj6)0!t&9w<3`~GrD+7a9(oE(k8glbfGSeziG#FVKnOGT`K{U*s Sz_Sggfx*+&&t;ucLK6U2mS5KZ literal 0 HcmV?d00001 diff --git a/resources/images/news/lrb_payed.png b/resources/images/news/lrb_payed.png new file mode 100644 index 0000000000000000000000000000000000000000..da966d6a1ac856337112794325ee8ae9d1ed3e1c GIT binary patch literal 315 zcmeAS@N?(olHy`uVBq!ia0vp^0wB!60wlNoGJgf6n3BBRT^Rni_n+Ah2>S z4={E+nQaFWEGuwK2hw1@3^B*n9tLtUJY5_^G|q3GcvAF$0tf4H8|$W7OVp|_%{rsl zrjllT^5tee8>g(ATTGj|l~UGNOETg8b7 zGE(Y#&*OH`Z?jX@anVb=wI6k}{ru@V<>mee#s_Rt>RXRZl?U3XTH+c}l9E`GYL#4+ z3Zxi}3=9o)4a{_nj6)0!t&9w<3`~GrD+7a9(oE(k8glbfGSeziG#FVKnOGT`K{U*s Sz_Sggfx*+&&t;ucLK6U2mS5KZ literal 0 HcmV?d00001 diff --git a/resources/recipes/lrb.recipe b/resources/recipes/lrb.recipe index 0076b3e697..4a203c80ae 100644 --- a/resources/recipes/lrb.recipe +++ b/resources/recipes/lrb.recipe @@ -1,6 +1,6 @@ __license__ = 'GPL v3' -__copyright__ = '2008, Darko Miletic ' +__copyright__ = '2008-2010, Darko Miletic ' ''' lrb.co.uk ''' @@ -8,32 +8,38 @@ lrb.co.uk from calibre.web.feeds.news import BasicNewsRecipe class LondonReviewOfBooks(BasicNewsRecipe): - title = u'London Review of Books' - __author__ = u'Darko Miletic' - description = u'Literary review publishing essay-length book reviews and topical articles on politics, literature, history, philosophy, science and the arts by leading writers and thinkers' - category = 'news, literature, England' - publisher = 'London Review of Books' - oldest_article = 7 + title = 'London Review of Books (free)' + __author__ = 'Darko Miletic' + description = 'Literary review publishing essay-length book reviews and topical articles on politics, literature, history, philosophy, science and the arts by leading writers and thinkers' + category = 'news, literature, UK' + publisher = 'LRB ltd.' + oldest_article = 15 max_articles_per_feed = 100 language = 'en_GB' no_stylesheets = True use_embedded_content = False encoding = 'utf-8' + publication_type = 'magazine' + masthead_url = 'http://www.lrb.co.uk/assets/images/lrb_logo_big.gif' + extra_css = ' body{font-family: Georgia,Palatino,"Palatino Linotype",serif} ' - conversion_options = { + conversion_options = { 'comments' : description ,'tags' : category ,'language' : language ,'publisher' : publisher } - - keep_only_tags = [dict(name='div' , attrs={'id' :'main'})] - remove_tags = [ - dict(name='div' , attrs={'class':['pagetools','issue-nav-controls','nocss']}) - ,dict(name='div' , attrs={'id' :['mainmenu','precontent','otherarticles'] }) - ,dict(name='span', attrs={'class':['inlineright','article-icons']}) - ,dict(name='ul' , attrs={'class':'article-controls'}) - ,dict(name='p' , attrs={'class':'meta-info' }) - ] + + keep_only_tags = [dict(attrs={'class':['article-body indent','letters','article-list']})] + remove_attributes = ['width','height'] feeds = [(u'London Review of Books', u'http://www.lrb.co.uk/lrbrss.xml')] + + def get_cover_url(self): + cover_url = None + soup = self.index_to_soup('http://www.lrb.co.uk/') + cover_item = soup.find('p',attrs={'class':'cover'}) + if cover_item: + cover_url = 'http://www.lrb.co.uk' + cover_item.a.img['src'] + return cover_url + diff --git a/resources/recipes/lrb_payed.recipe b/resources/recipes/lrb_payed.recipe new file mode 100644 index 0000000000..4888f61cb6 --- /dev/null +++ b/resources/recipes/lrb_payed.recipe @@ -0,0 +1,75 @@ + +__license__ = 'GPL v3' +__copyright__ = '2010, Darko Miletic ' +''' +lrb.co.uk +''' +from calibre import strftime +from calibre.web.feeds.news import BasicNewsRecipe + +class LondonReviewOfBooksPayed(BasicNewsRecipe): + title = 'London Review of Books' + __author__ = 'Darko Miletic' + description = 'Subscription content. Literary review publishing essay-length book reviews and topical articles on politics, literature, history, philosophy, science and the arts by leading writers and thinkers' + category = 'news, literature, UK' + publisher = 'LRB Ltd.' + max_articles_per_feed = 100 + language = 'en_GB' + no_stylesheets = True + delay = 1 + use_embedded_content = False + encoding = 'utf-8' + INDEX = 'http://www.lrb.co.uk' + LOGIN = INDEX + '/login' + masthead_url = INDEX + '/assets/images/lrb_logo_big.gif' + needs_subscription = True + publication_type = 'magazine' + extra_css = ' body{font-family: Georgia,Palatino,"Palatino Linotype",serif} ' + + + def get_browser(self): + br = BasicNewsRecipe.get_browser() + if self.username is not None and self.password is not None: + br.open(self.LOGIN) + br.select_form(nr=1) + br['username'] = self.username + br['password'] = self.password + br.submit() + return br + + def parse_index(self): + articles = [] + soup = self.index_to_soup(self.INDEX) + cover_item = soup.find('p',attrs={'class':'cover'}) + lrbtitle = self.title + if cover_item: + self.cover_url = self.INDEX + cover_item.a.img['src'] + content = self.INDEX + cover_item.a['href'] + soup2 = self.index_to_soup(content) + sitem = soup2.find(attrs={'class':'article-list'}) + lrbtitle = soup2.head.title.string + for item in sitem.findAll('a',attrs={'class':'title'}): + description = u'' + title_prefix = u'' + feed_link = item + if feed_link.has_key('href'): + url = self.INDEX + feed_link['href'] + title = title_prefix + self.tag_to_string(feed_link) + date = strftime(self.timefmt) + articles.append({ + 'title' :title + ,'date' :date + ,'url' :url + ,'description':description + }) + return [(lrbtitle, articles)] + + conversion_options = { + 'comments' : description + ,'tags' : category + ,'language' : language + ,'publisher' : publisher + } + + keep_only_tags = [dict(name='div' , attrs={'class':['article-body indent','letters']})] + remove_attributes = ['width','height']