diff --git a/resources/recipes/the_week_magazine_free.recipe b/resources/recipes/the_week_magazine_free.recipe index 1bac4133e7..6e033eaf82 100644 --- a/resources/recipes/the_week_magazine_free.recipe +++ b/resources/recipes/the_week_magazine_free.recipe @@ -1,17 +1,19 @@ - __license__ = 'GPL v3' -__copyright__ = '2010, Darko Miletic ' +__copyright__ = '2010, JOlo' ''' www.theweek.com ''' from calibre.web.feeds.news import BasicNewsRecipe +import re -class TheWeekFree(BasicNewsRecipe): - title = 'The Week Magazine - Free content' - __author__ = 'Darko Miletic' +class TheWeek(BasicNewsRecipe): + title = 'The Week Magazine' + __author__ = 'Jim Olo' description = "The best of the US and international media. Daily coverage of commentary and analysis of the day's events, as well as arts, entertainment, people and gossip, and political cartoons." publisher = 'The Week Publications, Inc.' + masthead_url = 'http://test.theweek.com/images/logo_theweek.gif' + cover_url = masthead_url category = 'news, politics, USA' oldest_article = 7 max_articles_per_feed = 100 @@ -19,31 +21,27 @@ class TheWeekFree(BasicNewsRecipe): encoding = 'utf-8' use_embedded_content = False language = 'en' + preprocess_regexps = [(re.compile(r'

', re.DOTALL), lambda match: '')] + remove_tags_before = dict(name='h1') + remove_tags_after = dict(name='div', attrs={'class':'articleSubscribe4free'}) + remove_tags = [ + dict(name='div', attrs={'class':['floatLeft','imageCaption','slideshowImageAttribution','postDate','utilities','cartoonInfo','left','middle','col300','articleSubscribe4free',' articleFlyout','articleFlyout floatRight','fourFreeBar']}) + ,dict(name='div', attrs={'id':['cartoonThumbs','rightColumn','header','partners']}) + ,dict(name='ul', attrs={'class':['slideshowNav','hotTopicsList topicList']}) + ] + remove_attributes = ['width','height', 'style', 'font', 'color'] + extra_css = ''' + h1{font-family:Geneva, Arial, Helvetica, sans-serif;color:#154B7A;} + h3{font-size: 14px;color:#999999; font-family:Geneva, Arial, Helvetica, sans-serif;font-weight: bold;} + h2{color:#666666; font-family:Geneva, Arial, Helvetica, sans-serif;font-size:small;} + p {font-family:Arial,Helvetica,sans-serif;} + ''' + filter_regexps = [r'www\.palmcoastdata\.com'] - conversion_options = { - 'comment' : description - , 'tags' : category - , 'publisher' : publisher - , 'language' : language - } - - keep_only_tags = [ - dict(name=['h1','h2']) - , dict(name='div', attrs={'class':'basefont'}) - , dict(name='div', attrs={'id':'slideshowLoader'}) - ] - - remove_tags = [ - dict(name='div', attrs={'id':['digg_dugg','articleRight','dateHeader']}) - ,dict(name=['object','embed','iframe']) - ] - - - feeds = [ - (u'News & Opinions' , u'http://www.theweek.com/section/index/news_opinion.rss') - ,(u'Arts & Leisure' , u'http://www.theweek.com/section/index/arts_leisure.rss') - ,(u'Business' , u'http://www.theweek.com/section/index/business.rss' ) - ,(u'Cartoon & Short takes' , u'http://www.theweek.com/section/index/cartoons_wit.rss') - ] - + feeds = [ + (u'News-Opinion', u'http://theweek.com/section/index/news_opinion.rss'), + (u'Business', u'http://theweek.com/section/index/business.rss'), + (u'Arts-Life', u'http://theweek.com/section/index/arts_life.rss'), + (u'Cartoons', u'http://theweek.com/section/index/cartoon_wit/0/all-cartoons.rss') + ]