This commit is contained in:
Kovid Goyal 2016-10-12 11:06:15 +05:30
commit cf747d617b
6 changed files with 30 additions and 102 deletions

View File

@ -18,17 +18,8 @@ class Engadget(BasicNewsRecipe):
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
auto_cleanup = True
remove_tags = [dict(name='div', attrs={'class': ["articleTools clearfix", "relatedContent", "pagination clearfix", "addResources"]}),
dict(name='div', attrs={'id': ["post-socialPromoBlock"]})]
keep_only_tags = [dict(name='div', attrs={'class': ["article"]})]
feeds = [(u'Top Tech Stories', u'http://infoworld.com/homepage/feed'),
(u'Today\'s Tech Headlines', u'http://www.infoworld.com/news/feed')]
def get_article_url(self, article):
url = article.get('link', None)
return url
feeds = [(u'Top Tech Stories', u'http://www.infoworld.com/index.rss'),
(u'Today\'s Tech Headlines',
u'http://www.infoworld.com/news/index.rss')]

View File

@ -17,37 +17,14 @@ class AdvancedUserRecipe1283666183(BasicNewsRecipe):
no_stylesheets = True
remove_javascript = True
use_embedded_content = False
keep_only_tags = [dict(name='div', attrs={'id': 'mainContent'})]
extra_css = '#copyinfo { font-size: 6 ;} \n #photocredit { font-size: 6 ;} \n .pubinfo { font-size: 6 ;}'
masthead_url = 'http://www.journalgazette.net/img/icons/jgmini.gif'
# cover_url = 'http://www.journalgazette.net/img/icons/jgmini.gif'
encoding = 'cp1252'
auto_cleanup = True
feeds = [(u'Opinion', u'http://journalgazette.net/apps/pbcs.dll/section?Category=EDIT&template=blogrss&mime=xml'),
(u'Local News', u'http://journalgazette.net/apps/pbcs.dll/section?Category=LOCAL&template=blogrss&mime=xml'),
(u'Sports', u'http://journalgazette.net/apps/pbcs.dll/section?Category=SPORTS&template=blogrss&mime=xml'),
(u'Features', u'http://journalgazette.net/apps/pbcs.dll/section?Category=FEAT&template=blogrss&mime=xml'),
(u'Business', u'http://journalgazette.net/apps/pbcs.dll/section?Category=BIZ&template=blogrss&mime=xml'),
(u'Ice Chips', u'http://journalgazette.net/apps/pbcs.dll/section?Category=BLOGS11&template=blogrss&mime=xml '),
(u'Entertainment', u'http://journalgazette.net/apps/pbcs.dll/section?Category=ENT&template=blogrss&mime=xml'),
(u'Food', u'http://journalgazette.net/apps/pbcs.dll/section?Category=FOOD&template=blogrss&mime=xml')
feeds = [(u'Opinion', u'http://www.journalgazette.net/opinion/rss/'),
(u'Local News', u'http://www.journalgazette.net/news/local/rss/'),
(u'Sports', u'http://www.journalgazette.net/sports/rss/'),
(u'Business', u'http://www.journalgazette.net/business/rss/'),
(u'Entertainment',
u'http://www.journalgazette.net/entertainment/rss/'),
(u'Food', u'http://www.journalgazette.net/food/'),
(u'Blogs', u'http://www.journalgazette.net/blog/rss/'),
]
def print_version(self, url):
split1 = url.split("/")
# print 'THE SPLIT IS: ', split1
url3 = split1[2]
url5 = split1[4]
url6 = split1[5]
url7 = split1[6]
print_url = 'http://' + url3 + '/apps/pbcs.dll/article?AID=/' + \
url5 + '/' + url6 + '/' + url7 + '/-1/EDIT01&template=printart'
# print 'THIS URL WILL PRINT: ', print_url # this is a test string to
# see what the url is it will return
return print_url
def preprocess_html(self, soup):
for item in soup.findAll(style=True):
del item['style']
return soup

View File

@ -1,4 +1,3 @@
__license__ = 'GPL v3'
__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
'''
@ -18,31 +17,12 @@ class JP_dk(BasicNewsRecipe):
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
encoding = 'cp1252'
language = 'da'
extra_css = ' body{font-family: Arial,Verdana,Helvetica,Geneva,sans-serif } h1{font-family: Times,Georgia,Verdana,serif } '
conversion_options = {
'comment': description, 'tags': category, 'publisher': publisher, 'language': language
}
auto_cleanup = True
feeds = [
(u'Tophistorier', u'http://www.jp.dk/rss/topnyheder.jsp'),
(u'Seneste nyt', u'http://jp.dk/index.jsp?service=rssfeed&submode=seneste'),
(u'Seneste nyt',
u'http://jp.dk/index.jsp?service=rssfeed&submode=seneste'),
(u'Indland', u'http://www.jp.dk/rss/indland.jsp'),
(u'Udland', u'http://www.jp.dk/rss/udland.jsp'),
(u'Ny viden', u'http://www.jp.dk/rss/nyviden.jsp'),
(u'Timeout', u'http://www.jp.dk/rss/timeout.jsp'),
(u'Kultur', u'http://www.jp.dk/rss/kultur.jsp'),
(u'Sport', u'http://www.jp.dk/rss/sport.jsp')
]
remove_tags = [
dict(name=['object', 'link']), dict(
name='p', attrs={'class': 'artByline'})
]
def print_version(self, url):
return url + '?service=printversion'

View File

@ -15,9 +15,10 @@ class JerusalemPost(BasicNewsRecipe):
no_stylesheets = True
feeds = [('Front Page', 'http://www.jpost.com/Rss/RssFeedsFrontPage.aspx'),
('Israel News', 'http://www.jpost.com/Rss/RssFeedsIsraelNews.aspx'),
('Middle East News', 'http://www.jpost.com/Rss/RssFeedsMiddleEastNews.aspx'),
('Israel News',
'http://www.jpost.com/Rss/RssFeedsIsraelNews.aspx'),
('Middle East News',
'http://www.jpost.com/Rss/RssFeedsMiddleEastNews.aspx'),
('International News',
'http://www.jpost.com/Rss/RssFeedsInternationalNews.aspx'),
('Editorials', 'http://www.jpost.com/Rss/RssFeedsEditorialsNews.aspx'),
'http://www.jpost.com/Rss/RssFeedsPolitiqueetsocial.aspx'),
]

View File

@ -18,7 +18,6 @@ class AdvancedUserRecipe1274742400(BasicNewsRecipe):
(u'Top Stories', u'http://www.reviewjournal.com/rss.xml'),
(u'News', u'http://www.reviewjournal.com/news/feed'),
(u'Business', u'http://www.reviewjournal.com/business/feed'),
(u'Living', u'http://www.reviewjournal.com/living/feed'),
(u'Opinion', u'http://www.reviewjournal.com/opinion/feed'),
(u'Neon', u'http://www.reviewjournal.com/neon/feed'),
(u'Sports', u'http://www.reviewjournal.com/sports/feed')]

View File

@ -12,33 +12,13 @@ class AdvancedUserRecipe1282101454(BasicNewsRecipe):
max_articles_per_feed = 100
no_stylesheets = True
remove_javascript = True
masthead_url = 'http://assets.matchbin.com/sites/624/assets/logo.gif'
keep_only_tags = [
dict(name='div', attrs={'id': ['print_content_container']})
]
auto_cleanup = True
feeds = [
('Local News', 'http://mdjonline.com/rss/rss/Local+News?content_type=article&tags=news&page_name=rss&tag_inclusion=or&offset=0&limit=10&instance=Local+News'), # noqa
('Sports', 'http://mdjonline.com/rss/rss/Sports?content_type=article&tags=sports&page_name=rss&tag_inclusion=or&offset=0&limit=10&instance=Sports'),
('Obits', 'http://mdjonline.com/rss/rss/Obits?content_type=article&tags=obits&page_name=rss&tag_inclusion=or&offset=0&limit=20&instance=Obits'),
('Editorial & oped', 'http://mdjonline.com/rss/rss/Editorial+and+OPED?content_type=article&tags=oped+editorial&page_name=rss&tag_inclusion=or&offset=0&limit=10&instance=Editorial+and+OPED'), # noqa
('Lifestyle', 'http://mdjonline.com/rss/rss/Lifestyle?content_type=article&tags=lifestyle&page_name=rss&tag_inclusion=or&offset=0&limit=10&instance=Lifestyle'), # noqa
('Blogs', 'http://mdjonline.com/rss/rss/Lifestyle?content_type=article&tags=lifestyle&page_name=rss&tag_inclusion=or&offset=0&limit=10&instance=Lifestyle') # noqa
('Local News', 'http://www.mdjonline.com/search/?f=rss&amp;t=article&amp;c=news/local&amp;l=50&amp;s=start_time&amp;sd=desc'), # noqa
('Sports', 'http://www.mdjonline.com/search/?f=rss&amp;t=article&amp;c=sports&amp;l=50&amp;s=start_time&amp;sd=desc'),
('Obits', 'http://www.mdjonline.com/search/?f=rss&amp;t=article&amp;c=obituaries&amp;l=50&amp;s=start_time&amp;sd=desc'),
('Editorial & oped', 'http://www.mdjonline.com/search/?f=rss&amp;t=article&amp;c=opinion/mdj_editorials&amp;l=50&amp;s=start_time&amp;sd=desc'), # noqa
('Lifestyle', 'http://www.mdjonline.com/search/?f=rss&amp;t=article&amp;c=news/lifestyle&amp;l=50&amp;s=start_time&amp;sd=desc'), # noqa
('Blogs', 'http://www.mdjonline.com/search/?f=rss&amp;t=article&amp;c=opinion/blogs&amp;l=50&amp;s=start_time&amp;sd=desc') # noqa
]
def print_version(self, url):
split1 = url.split("/")
artid = split1[4]
# example of link to convert
# Original link: http://mdjonline.com/bookmark/9274197
# print version: http://mdjonline.com/printer_friendly/9274197
print_url = 'http://mdjonline.com/printer_friendly/' + artid
return print_url
# test with ebook-convert nejm.recipe output_dir --test -vv >
# myrecipe.txt