Fix Editor and Publisher

This commit is contained in:
Kovid Goyal 2010-07-03 20:00:32 -06:00
parent 4c25365ea4
commit 44957d4562

View File

@ -1,14 +1,29 @@
import re
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = '2010 elsuave'
from calibre.web.feeds.news import BasicNewsRecipe
class EandP(BasicNewsRecipe):
title = u'Editor and Publisher'
__author__ = u'Xanthan Gum'
__author__ = u'elsuave (modified from Xanthan Gum)'
description = 'News about newspapers and journalism.'
publisher = 'Editor and Publisher'
category = 'news, journalism, industry'
language = 'en'
max_articles_per_feed = 25
no_stylesheets = True
use_embedded_content = False
encoding = 'utf8'
cover_url = 'http://www.editorandpublisher.com/images/EP_main_logo.gif'
remove_javascript = True
oldest_article = 7
max_articles_per_feed = 100
html2lrf_options = [
'--comment', description
, '--category', category
, '--publisher', publisher
]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
# Font formatting code borrowed from kwetal
@ -18,17 +33,21 @@ class EandP(BasicNewsRecipe):
h2{font-size: large;}
'''
# Delete everything before the article
# Keep only div:itemmgap
remove_tags_before = dict(name='font', attrs={'class':'titlebar_black'})
keep_only_tags = [
dict(name='div', attrs={'class':'itemmgap'})
]
# Delete everything after the article
# Remove commenting/social media lins
preprocess_regexps = [(re.compile(r'<!--endclickprintinclude-->.*</body>', re.DOTALL|re.IGNORECASE),
lambda match: '</body>'),]
remove_tags_after = [dict(name='div', attrs={'class':'clear'})]
feeds = [(u'Breaking News', u'http://www.editorandpublisher.com/GenerateRssFeed.aspx'),
(u'Business News', u'http://www.editorandpublisher.com/GenerateRssFeed.aspx?CategoryId=2'),
(u'Ad/Circ News', u'http://www.editorandpublisher.com/GenerateRssFeed.aspx?CategoryId=3'),
(u'Newsroom', u'http://www.editorandpublisher.com/GenerateRssFeed.aspx?CategoryId=4'),
(u'Technology News', u'http://www.editorandpublisher.com/GenerateRssFeed.aspx?CategoryId=5'),
(u'Syndicates News', u'http://www.editorandpublisher.com/GenerateRssFeed.aspx?CategoryId=7')]
feeds = [(u'Breaking News', u'http://feeds.feedburner.com/EditorAndPublisher-BreakingNews'),
(u'Business News', u'http://feeds.feedburner.com/EditorAndPublisher-BusinessNews'),
(u'Newsroom', u'http://feeds.feedburner.com/EditorAndPublisher-Newsroom'),
(u'Technology News', u'http://feeds.feedburner.com/EditorAndPublisher-Technology'),
(u'Syndicates News', u'http://feeds.feedburner.com/EditorAndPublisher-Syndicates')]