mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
IGN:Various improved recipes
This commit is contained in:
parent
2ce5dec5ee
commit
05bc40b53f
@ -6,10 +6,10 @@ class HBR(BasicNewsRecipe):
|
|||||||
title = 'Harvard Business Review'
|
title = 'Harvard Business Review'
|
||||||
description = 'To subscribe go to http://hbr.harvardbusiness.org'
|
description = 'To subscribe go to http://hbr.harvardbusiness.org'
|
||||||
needs_subscription = True
|
needs_subscription = True
|
||||||
__author__ = 'Kovid Goyal'
|
__author__ = 'Kovid Goyal and Sujata Raman'
|
||||||
timefmt = ' [%B %Y]'
|
timefmt = ' [%B %Y]'
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
|
|
||||||
LOGIN_URL = 'http://hbr.harvardbusiness.org/login?request_url=/'
|
LOGIN_URL = 'http://hbr.harvardbusiness.org/login?request_url=/'
|
||||||
INDEX = 'http://hbr.harvardbusiness.org/current'
|
INDEX = 'http://hbr.harvardbusiness.org/current'
|
||||||
|
|
||||||
@ -20,14 +20,14 @@ class HBR(BasicNewsRecipe):
|
|||||||
'contentRight', 'summaryLink']),
|
'contentRight', 'summaryLink']),
|
||||||
dict(name='form'),
|
dict(name='form'),
|
||||||
]
|
]
|
||||||
|
|
||||||
extra_css = '''
|
extra_css = '''
|
||||||
a {font-family:Georgia,"Times New Roman",Times,serif; font-style:italic; color:#000000; }
|
a {font-family:Georgia,"Times New Roman",Times,serif; font-style:italic; color:#000000; }
|
||||||
.article{font-family:Georgia,"Times New Roman",Times,serif; font-size: xx-small;}
|
.article{font-family:Georgia,"Times New Roman",Times,serif; font-size: xx-small;}
|
||||||
h2{font-family:Georgia,"Times New Roman",Times,serif; font-weight:bold; font-size:large; }
|
h2{font-family:Georgia,"Times New Roman",Times,serif; font-weight:bold; font-size:large; }
|
||||||
h4{font-family:Georgia,"Times New Roman",Times,serif; font-weight:bold; font-size:small; }
|
h4{font-family:Georgia,"Times New Roman",Times,serif; font-weight:bold; font-size:small; }
|
||||||
#articleAuthors{font-family:Georgia,"Times New Roman",Times,serif; font-style:italic; color:#000000;font-size:x-small;}
|
#articleAuthors{font-family:Georgia,"Times New Roman",Times,serif; font-style:italic; color:#000000;font-size:x-small;}
|
||||||
#summaryText{font-family:Georgia,"Times New Roman",Times,serif; font-weight:bold; font-size:x-small;}
|
#summaryText{font-family:Georgia,"Times New Roman",Times,serif; font-weight:bold; font-size:x-small;}
|
||||||
'''
|
'''
|
||||||
|
|
||||||
def get_browser(self):
|
def get_browser(self):
|
||||||
@ -100,10 +100,10 @@ class HBR(BasicNewsRecipe):
|
|||||||
index = 'http://hbr.harvardbusiness.org/current'
|
index = 'http://hbr.harvardbusiness.org/current'
|
||||||
soup = self.index_to_soup(index)
|
soup = self.index_to_soup(index)
|
||||||
link_item = soup.find('img', alt=re.compile("HBR Cover Image"), src=True)
|
link_item = soup.find('img', alt=re.compile("HBR Cover Image"), src=True)
|
||||||
|
|
||||||
if link_item:
|
if link_item:
|
||||||
cover_url = 'http://hbr.harvardbusiness.org' + link_item['src']
|
cover_url = 'http://hbr.harvardbusiness.org' + link_item['src']
|
||||||
|
|
||||||
return cover_url
|
return cover_url
|
||||||
|
|
||||||
|
|
||||||
|
@ -12,20 +12,29 @@ from calibre.web.feeds.news import BasicNewsRecipe
|
|||||||
class KellogInsight(BasicNewsRecipe):
|
class KellogInsight(BasicNewsRecipe):
|
||||||
|
|
||||||
title = 'Kellog Insight'
|
title = 'Kellog Insight'
|
||||||
__author__ = 'Kovid Goyal'
|
__author__ = 'Kovid Goyal and Sujata Raman'
|
||||||
description = 'Articles from the Kellog School of Management'
|
description = 'Articles from the Kellog School of Management'
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
encoding = 'utf-8'
|
encoding = 'utf-8'
|
||||||
language = 'en'
|
language = 'en'
|
||||||
|
|
||||||
oldest_article = 60
|
oldest_article = 60
|
||||||
remove_tags_before = {'name':'h1'}
|
|
||||||
remove_tags_after = {'class':'col-two-text'}
|
|
||||||
|
|
||||||
|
keep_only_tags = [dict(name='div', attrs={'id':['print_no_comments']})]
|
||||||
|
|
||||||
|
remove_tags = [dict(name='div', attrs={'class':'col-three'})]
|
||||||
|
|
||||||
feeds = [('Articles',
|
extra_css = '''
|
||||||
'http://insight.kellogg.northwestern.edu/index.php/Kellogg/RSS')]
|
h1{font-family:arial; font-size:medium; color:#333333;}
|
||||||
|
.col-one{font-family:arial; font-size:xx-small;}
|
||||||
|
.col-two{font-family:arial; font-size:x-small; }
|
||||||
|
h2{font-family:arial; font-size:small; color:#666666;}
|
||||||
|
h3{font-family:arial; font-size:small; color:#333333;text-transform: uppercase; font-weight:normal;}
|
||||||
|
h4{color:#660000;font-family:arial; font-size:x-small;}
|
||||||
|
.col-two-text{font-family:arial; font-size:x-small; color:#333333;}
|
||||||
|
'''
|
||||||
|
|
||||||
|
feeds = [('Articles', 'http://insight.kellogg.northwestern.edu/index.php/Kellogg/RSS')]
|
||||||
|
|
||||||
def get_article_url(self, article):
|
def get_article_url(self, article):
|
||||||
# Get only article not blog links
|
# Get only article not blog links
|
||||||
@ -34,3 +43,11 @@ class KellogInsight(BasicNewsRecipe):
|
|||||||
return link
|
return link
|
||||||
self.log('Skipping non-article', link)
|
self.log('Skipping non-article', link)
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
def preprocess_html(self, soup):
|
||||||
|
|
||||||
|
for tag in soup.findAll(name=['span']):
|
||||||
|
tag.nextSibling.name = 'h4'
|
||||||
|
|
||||||
|
return soup
|
||||||
|
|
||||||
|
@ -9,7 +9,7 @@ from calibre.web.feeds.news import BasicNewsRecipe
|
|||||||
|
|
||||||
class Sciencenews(BasicNewsRecipe):
|
class Sciencenews(BasicNewsRecipe):
|
||||||
title = u'ScienceNews'
|
title = u'ScienceNews'
|
||||||
__author__ = u'Darko Miletic'
|
__author__ = u'Darko Miletic and Sujata Raman'
|
||||||
description = u"Science News is an award-winning weekly newsmagazine covering the most important research in all fields of science. Its 16 pages each week are packed with short, accurate articles that appeal to both general readers and scientists. Published since 1922, the magazine now reaches about 150,000 subscribers and more than 1 million readers. These are the latest News Items from Science News."
|
description = u"Science News is an award-winning weekly newsmagazine covering the most important research in all fields of science. Its 16 pages each week are packed with short, accurate articles that appeal to both general readers and scientists. Published since 1922, the magazine now reaches about 150,000 subscribers and more than 1 million readers. These are the latest News Items from Science News."
|
||||||
oldest_article = 30
|
oldest_article = 30
|
||||||
language = 'en'
|
language = 'en'
|
||||||
@ -17,13 +17,45 @@ class Sciencenews(BasicNewsRecipe):
|
|||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
timefmt = ' [%A, %d %B, %Y]'
|
timefmt = ' [%A, %d %B, %Y]'
|
||||||
|
|
||||||
|
extra_css = '''
|
||||||
|
.content_description{font-family:georgia ;font-size:x-large; color:#646464 ; font-weight:bold;}
|
||||||
|
.content_summary{font-family:georgia ;font-size:small ;color:#585858 ; font-weight:bold;}
|
||||||
|
.content_authors{font-family:helvetica,arial ;font-size: xx-small ;color:#14487E ;}
|
||||||
|
.content_edition{font-family:helvetica,arial ;font-size: xx-small ;}
|
||||||
|
.exclusive{color:#FF0000 ;}
|
||||||
|
.anonymous{color:#14487E ;}
|
||||||
|
.content_content{font-family:helvetica,arial ;font-size: x-small ; color:#000000;}
|
||||||
|
.description{color:#585858;font-family:helvetica,arial ;font-size: xx-small ;}
|
||||||
|
.credit{color:#A6A6A6;font-family:helvetica,arial ;font-size: xx-small ;}
|
||||||
|
'''
|
||||||
|
|
||||||
keep_only_tags = [ dict(name='div', attrs={'id':'column_action'}) ]
|
keep_only_tags = [ dict(name='div', attrs={'id':'column_action'}) ]
|
||||||
remove_tags_after = dict(name='ul', attrs={'id':'content_functions_bottom'})
|
remove_tags_after = dict(name='ul', attrs={'id':'content_functions_bottom'})
|
||||||
remove_tags = [
|
remove_tags = [
|
||||||
dict(name='ul', attrs={'id':'content_functions_bottom'})
|
dict(name='ul', attrs={'id':'content_functions_bottom'})
|
||||||
,dict(name='div', attrs={'id':'content_functions_top'})
|
,dict(name='div', attrs={'id':['content_functions_top','breadcrumb_content']})
|
||||||
|
,dict(name='img', attrs={'class':'icon'})
|
||||||
|
,dict(name='div', attrs={'class': 'embiggen'})
|
||||||
]
|
]
|
||||||
|
|
||||||
feeds = [(u"Science News / News Items", u'http://sciencenews.org/view/feed/type/news/name/news.rss')]
|
feeds = [(u"Science News / News Items", u'http://sciencenews.org/view/feed/type/news/name/news.rss')]
|
||||||
|
|
||||||
|
def get_cover_url(self):
|
||||||
|
cover_url = None
|
||||||
|
index = 'http://www.sciencenews.org/view/home'
|
||||||
|
soup = self.index_to_soup(index)
|
||||||
|
link_item = soup.find(name = 'img',alt = "issue")
|
||||||
|
print link_item
|
||||||
|
if link_item:
|
||||||
|
cover_url = 'http://www.sciencenews.org' + link_item['src'] + '.jpg'
|
||||||
|
|
||||||
|
return cover_url
|
||||||
|
|
||||||
|
def preprocess_html(self, soup):
|
||||||
|
|
||||||
|
for tag in soup.findAll(name=['span']):
|
||||||
|
tag.name = 'div'
|
||||||
|
|
||||||
|
return soup
|
||||||
|
Loading…
x
Reference in New Issue
Block a user