diff --git a/recipes/infoworld.recipe b/recipes/infoworld.recipe index 26b6445e9e..23d3cacf64 100644 --- a/recipes/infoworld.recipe +++ b/recipes/infoworld.recipe @@ -18,17 +18,8 @@ class Engadget(BasicNewsRecipe): max_articles_per_feed = 100 no_stylesheets = True use_embedded_content = False + auto_cleanup = True - remove_tags = [dict(name='div', attrs={'class': ["articleTools clearfix", "relatedContent", "pagination clearfix", "addResources"]}), - dict(name='div', attrs={'id': ["post-socialPromoBlock"]})] - - keep_only_tags = [dict(name='div', attrs={'class': ["article"]})] - - feeds = [(u'Top Tech Stories', u'http://infoworld.com/homepage/feed'), - (u'Today\'s Tech Headlines', u'http://www.infoworld.com/news/feed')] - - def get_article_url(self, article): - - url = article.get('link', None) - - return url + feeds = [(u'Top Tech Stories', u'http://www.infoworld.com/index.rss'), + (u'Today\'s Tech Headlines', + u'http://www.infoworld.com/news/index.rss')] diff --git a/recipes/journalgazette.recipe b/recipes/journalgazette.recipe index 1850af2970..81fb6d1b9b 100644 --- a/recipes/journalgazette.recipe +++ b/recipes/journalgazette.recipe @@ -17,37 +17,14 @@ class AdvancedUserRecipe1283666183(BasicNewsRecipe): no_stylesheets = True remove_javascript = True use_embedded_content = False - keep_only_tags = [dict(name='div', attrs={'id': 'mainContent'})] - extra_css = '#copyinfo { font-size: 6 ;} \n #photocredit { font-size: 6 ;} \n .pubinfo { font-size: 6 ;}' - masthead_url = 'http://www.journalgazette.net/img/icons/jgmini.gif' -# cover_url = 'http://www.journalgazette.net/img/icons/jgmini.gif' - encoding = 'cp1252' + auto_cleanup = True - feeds = [(u'Opinion', u'http://journalgazette.net/apps/pbcs.dll/section?Category=EDIT&template=blogrss&mime=xml'), - (u'Local News', u'http://journalgazette.net/apps/pbcs.dll/section?Category=LOCAL&template=blogrss&mime=xml'), - (u'Sports', u'http://journalgazette.net/apps/pbcs.dll/section?Category=SPORTS&template=blogrss&mime=xml'), - (u'Features', u'http://journalgazette.net/apps/pbcs.dll/section?Category=FEAT&template=blogrss&mime=xml'), - (u'Business', u'http://journalgazette.net/apps/pbcs.dll/section?Category=BIZ&template=blogrss&mime=xml'), - (u'Ice Chips', u'http://journalgazette.net/apps/pbcs.dll/section?Category=BLOGS11&template=blogrss&mime=xml '), - (u'Entertainment', u'http://journalgazette.net/apps/pbcs.dll/section?Category=ENT&template=blogrss&mime=xml'), - (u'Food', u'http://journalgazette.net/apps/pbcs.dll/section?Category=FOOD&template=blogrss&mime=xml') + feeds = [(u'Opinion', u'http://www.journalgazette.net/opinion/rss/'), + (u'Local News', u'http://www.journalgazette.net/news/local/rss/'), + (u'Sports', u'http://www.journalgazette.net/sports/rss/'), + (u'Business', u'http://www.journalgazette.net/business/rss/'), + (u'Entertainment', + u'http://www.journalgazette.net/entertainment/rss/'), + (u'Food', u'http://www.journalgazette.net/food/'), + (u'Blogs', u'http://www.journalgazette.net/blog/rss/'), ] - - def print_version(self, url): - split1 = url.split("/") - # print 'THE SPLIT IS: ', split1 - url3 = split1[2] - url5 = split1[4] - url6 = split1[5] - url7 = split1[6] - - print_url = 'http://' + url3 + '/apps/pbcs.dll/article?AID=/' + \ - url5 + '/' + url6 + '/' + url7 + '/-1/EDIT01&template=printart' - # print 'THIS URL WILL PRINT: ', print_url # this is a test string to - # see what the url is it will return - return print_url - - def preprocess_html(self, soup): - for item in soup.findAll(style=True): - del item['style'] - return soup diff --git a/recipes/jp_dk.recipe b/recipes/jp_dk.recipe index b64b57231e..65881cca5a 100644 --- a/recipes/jp_dk.recipe +++ b/recipes/jp_dk.recipe @@ -1,4 +1,3 @@ - __license__ = 'GPL v3' __copyright__ = '2010, Darko Miletic ' ''' @@ -18,31 +17,12 @@ class JP_dk(BasicNewsRecipe): max_articles_per_feed = 100 no_stylesheets = True use_embedded_content = False - encoding = 'cp1252' language = 'da' - - extra_css = ' body{font-family: Arial,Verdana,Helvetica,Geneva,sans-serif } h1{font-family: Times,Georgia,Verdana,serif } ' - - conversion_options = { - 'comment': description, 'tags': category, 'publisher': publisher, 'language': language - } + auto_cleanup = True feeds = [ - - (u'Tophistorier', u'http://www.jp.dk/rss/topnyheder.jsp'), - (u'Seneste nyt', u'http://jp.dk/index.jsp?service=rssfeed&submode=seneste'), - (u'Indland', u'http://www.jp.dk/rss/indland.jsp'), - (u'Udland', u'http://www.jp.dk/rss/udland.jsp'), - (u'Ny viden', u'http://www.jp.dk/rss/nyviden.jsp'), - (u'Timeout', u'http://www.jp.dk/rss/timeout.jsp'), - (u'Kultur', u'http://www.jp.dk/rss/kultur.jsp'), - (u'Sport', u'http://www.jp.dk/rss/sport.jsp') + (u'Tophistorier', u'http://www.jp.dk/rss/topnyheder.jsp'), + (u'Seneste nyt', + u'http://jp.dk/index.jsp?service=rssfeed&submode=seneste'), + (u'Indland', u'http://www.jp.dk/rss/indland.jsp'), ] - - remove_tags = [ - dict(name=['object', 'link']), dict( - name='p', attrs={'class': 'artByline'}) - ] - - def print_version(self, url): - return url + '?service=printversion' diff --git a/recipes/jpost.recipe b/recipes/jpost.recipe index 4acbc8a863..aa7cebaaab 100644 --- a/recipes/jpost.recipe +++ b/recipes/jpost.recipe @@ -15,9 +15,10 @@ class JerusalemPost(BasicNewsRecipe): no_stylesheets = True feeds = [('Front Page', 'http://www.jpost.com/Rss/RssFeedsFrontPage.aspx'), - ('Israel News', 'http://www.jpost.com/Rss/RssFeedsIsraelNews.aspx'), - ('Middle East News', 'http://www.jpost.com/Rss/RssFeedsMiddleEastNews.aspx'), + ('Israel News', + 'http://www.jpost.com/Rss/RssFeedsIsraelNews.aspx'), + ('Middle East News', + 'http://www.jpost.com/Rss/RssFeedsMiddleEastNews.aspx'), ('International News', - 'http://www.jpost.com/Rss/RssFeedsInternationalNews.aspx'), - ('Editorials', 'http://www.jpost.com/Rss/RssFeedsEditorialsNews.aspx'), + 'http://www.jpost.com/Rss/RssFeedsPolitiqueetsocial.aspx'), ] diff --git a/recipes/las_vegas_review.recipe b/recipes/las_vegas_review.recipe index 566b2a8c5c..47e9a587af 100644 --- a/recipes/las_vegas_review.recipe +++ b/recipes/las_vegas_review.recipe @@ -18,7 +18,6 @@ class AdvancedUserRecipe1274742400(BasicNewsRecipe): (u'Top Stories', u'http://www.reviewjournal.com/rss.xml'), (u'News', u'http://www.reviewjournal.com/news/feed'), (u'Business', u'http://www.reviewjournal.com/business/feed'), - (u'Living', u'http://www.reviewjournal.com/living/feed'), (u'Opinion', u'http://www.reviewjournal.com/opinion/feed'), (u'Neon', u'http://www.reviewjournal.com/neon/feed'), (u'Sports', u'http://www.reviewjournal.com/sports/feed')] diff --git a/recipes/mdj.recipe b/recipes/mdj.recipe index c50951e6ad..d454e2b5dd 100644 --- a/recipes/mdj.recipe +++ b/recipes/mdj.recipe @@ -12,33 +12,13 @@ class AdvancedUserRecipe1282101454(BasicNewsRecipe): max_articles_per_feed = 100 no_stylesheets = True remove_javascript = True - - masthead_url = 'http://assets.matchbin.com/sites/624/assets/logo.gif' - - keep_only_tags = [ - dict(name='div', attrs={'id': ['print_content_container']}) - - ] + auto_cleanup = True feeds = [ - ('Local News', 'http://mdjonline.com/rss/rss/Local+News?content_type=article&tags=news&page_name=rss&tag_inclusion=or&offset=0&limit=10&instance=Local+News'), # noqa - ('Sports', 'http://mdjonline.com/rss/rss/Sports?content_type=article&tags=sports&page_name=rss&tag_inclusion=or&offset=0&limit=10&instance=Sports'), - ('Obits', 'http://mdjonline.com/rss/rss/Obits?content_type=article&tags=obits&page_name=rss&tag_inclusion=or&offset=0&limit=20&instance=Obits'), - ('Editorial & oped', 'http://mdjonline.com/rss/rss/Editorial+and+OPED?content_type=article&tags=oped+editorial&page_name=rss&tag_inclusion=or&offset=0&limit=10&instance=Editorial+and+OPED'), # noqa - ('Lifestyle', 'http://mdjonline.com/rss/rss/Lifestyle?content_type=article&tags=lifestyle&page_name=rss&tag_inclusion=or&offset=0&limit=10&instance=Lifestyle'), # noqa - ('Blogs', 'http://mdjonline.com/rss/rss/Lifestyle?content_type=article&tags=lifestyle&page_name=rss&tag_inclusion=or&offset=0&limit=10&instance=Lifestyle') # noqa + ('Local News', 'http://www.mdjonline.com/search/?f=rss&t=article&c=news/local&l=50&s=start_time&sd=desc'), # noqa + ('Sports', 'http://www.mdjonline.com/search/?f=rss&t=article&c=sports&l=50&s=start_time&sd=desc'), + ('Obits', 'http://www.mdjonline.com/search/?f=rss&t=article&c=obituaries&l=50&s=start_time&sd=desc'), + ('Editorial & oped', 'http://www.mdjonline.com/search/?f=rss&t=article&c=opinion/mdj_editorials&l=50&s=start_time&sd=desc'), # noqa + ('Lifestyle', 'http://www.mdjonline.com/search/?f=rss&t=article&c=news/lifestyle&l=50&s=start_time&sd=desc'), # noqa + ('Blogs', 'http://www.mdjonline.com/search/?f=rss&t=article&c=opinion/blogs&l=50&s=start_time&sd=desc') # noqa ] - - def print_version(self, url): - split1 = url.split("/") - artid = split1[4] - - # example of link to convert - # Original link: http://mdjonline.com/bookmark/9274197 - # print version: http://mdjonline.com/printer_friendly/9274197 - - print_url = 'http://mdjonline.com/printer_friendly/' + artid - return print_url - - # test with ebook-convert nejm.recipe output_dir --test -vv > - # myrecipe.txt