From b5247098098005bc1efb548f3d2eb39e12199354 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Thu, 27 Nov 2014 13:06:17 +0530 Subject: [PATCH] Update Defense News --- recipes/defensenews.recipe | 50 ++++++++++++++------------------------ 1 file changed, 18 insertions(+), 32 deletions(-) diff --git a/recipes/defensenews.recipe b/recipes/defensenews.recipe index a521529981..5bac7727de 100644 --- a/recipes/defensenews.recipe +++ b/recipes/defensenews.recipe @@ -11,52 +11,38 @@ class DefenseNews(BasicNewsRecipe): __author__ = 'Darko Miletic' description = 'Find late-breaking defense news from the leading defense news weekly' publisher = 'Gannett Government Media Corporation' - category = 'defense news, defence news, defense, defence, defence budget, defence policy' oldest_article = 31 max_articles_per_feed = 200 no_stylesheets = True - encoding = 'utf8' + encoding = 'cp1252' use_embedded_content = False language = 'en' remove_empty_feeds = True publication_type = 'newspaper' + ignore_duplicate_articles = {'url'} masthead_url = 'http://www.defensenews.com/images/logo_defensenews2.jpg' extra_css = """ body{font-family: Arial,Helvetica,sans-serif } img{margin-bottom: 0.4em; display:block} .info{font-size: small; color: gray} """ + remove_attributes = ['style', 'lang'] - conversion_options = { - 'comment' : description - , 'tags' : category - , 'publisher' : publisher - , 'language' : language - } - + keep_only_tags = [ + dict(attrs={'class':['ody-hgroup', 'ody-article']}), + ] remove_tags = [ - dict(name=['meta','link']) - ,dict(attrs={'class':['toolbar','related','left','right']}) - ] - remove_tags_before = attrs={'class':'storyWrp'} - remove_tags_after = attrs={'class':'middle'} - - remove_attributes=['lang'] + dict(name=['meta','link']), + dict(attrs={'class':['toolbar', 'toolsShareWrap', 'ody-bo-sm ', 'ody-comments', 'ody-related-links','left','right']}), + dict(id=['factsMore', 'ody-nextstoryslider']), + ] feeds = [ - (u'Europe' , u'http://www.defensenews.com/rss/eur/' ) - ,(u'Americas', u'http://www.defensenews.com/rss/ame/' ) - ,(u'Asia & Pacific rim', u'http://www.defensenews.com/rss/asi/' ) - ,(u'Middle east & Africa', u'http://www.defensenews.com/rss/mid/') - ,(u'Air', u'http://www.defensenews.com/rss/air/' ) - ,(u'Land', u'http://www.defensenews.com/rss/lan/' ) - ,(u'Naval', u'http://www.defensenews.com/rss/sea/' ) - ] - - def preprocess_html(self, soup): - for item in soup.findAll(style=True): - del item['style'] - for item in soup.findAll('img'): - if not item.has_key('alt'): - item['alt'] = 'image' - return soup + (u'Europe' , u'http://www.defensenews.com/rss/europe') + ,(u'Americas', u'http://www.defensenews.com/rss/americas') + ,(u'Asia & Pacific rim', u'http://www.defensenews.com/rss/asia-pacific-rim') + ,(u'Middle east & Africa', u'http://www.defensenews.com/rss/middle-east-africa') + ,(u'Air', u'http://www.defensenews.com/rss/air-warfare') + ,(u'Land', u'http://www.defensenews.com/rss/land-warfare') + ,(u'Naval', u'http://www.defensenews.com/rss/naval-warfare') + ]