diff --git a/resources/recipes/editor_and_publisher.recipe b/resources/recipes/editor_and_publisher.recipe index c8f287a0c7..0ec5c59d74 100644 --- a/resources/recipes/editor_and_publisher.recipe +++ b/resources/recipes/editor_and_publisher.recipe @@ -1,14 +1,29 @@ -import re +#!/usr/bin/env python +__license__ = 'GPL v3' +__copyright__ = '2010 elsuave' + from calibre.web.feeds.news import BasicNewsRecipe class EandP(BasicNewsRecipe): title = u'Editor and Publisher' - __author__ = u'Xanthan Gum' + __author__ = u'elsuave (modified from Xanthan Gum)' description = 'News about newspapers and journalism.' + publisher = 'Editor and Publisher' + category = 'news, journalism, industry' language = 'en' - no_stylesheets = True + max_articles_per_feed = 25 + no_stylesheets = True + use_embedded_content = False + encoding = 'utf8' + cover_url = 'http://www.editorandpublisher.com/images/EP_main_logo.gif' + remove_javascript = True - oldest_article = 7 - max_articles_per_feed = 100 + html2lrf_options = [ + '--comment', description + , '--category', category + , '--publisher', publisher + ] + + html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"' # Font formatting code borrowed from kwetal @@ -18,17 +33,21 @@ class EandP(BasicNewsRecipe): h2{font-size: large;} ''' - # Delete everything before the article + # Keep only div:itemmgap - remove_tags_before = dict(name='font', attrs={'class':'titlebar_black'}) + keep_only_tags = [ + dict(name='div', attrs={'class':'itemmgap'}) + ] - # Delete everything after the article + # Remove commenting/social media lins - preprocess_regexps = [(re.compile(r'.*', re.DOTALL|re.IGNORECASE), - lambda match: ''),] + remove_tags_after = [dict(name='div', attrs={'class':'clear'})] + + + feeds = [(u'Breaking News', u'http://www.editorandpublisher.com/GenerateRssFeed.aspx'), + (u'Business News', u'http://www.editorandpublisher.com/GenerateRssFeed.aspx?CategoryId=2'), + (u'Ad/Circ News', u'http://www.editorandpublisher.com/GenerateRssFeed.aspx?CategoryId=3'), + (u'Newsroom', u'http://www.editorandpublisher.com/GenerateRssFeed.aspx?CategoryId=4'), + (u'Technology News', u'http://www.editorandpublisher.com/GenerateRssFeed.aspx?CategoryId=5'), + (u'Syndicates News', u'http://www.editorandpublisher.com/GenerateRssFeed.aspx?CategoryId=7')] - feeds = [(u'Breaking News', u'http://feeds.feedburner.com/EditorAndPublisher-BreakingNews'), - (u'Business News', u'http://feeds.feedburner.com/EditorAndPublisher-BusinessNews'), - (u'Newsroom', u'http://feeds.feedburner.com/EditorAndPublisher-Newsroom'), - (u'Technology News', u'http://feeds.feedburner.com/EditorAndPublisher-Technology'), - (u'Syndicates News', u'http://feeds.feedburner.com/EditorAndPublisher-Syndicates')]