mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Popular Science by Tony Stegall
This commit is contained in:
parent
25bacf9a97
commit
1a5b92d6d9
BIN
resources/images/news/popscience.png
Normal file
BIN
resources/images/news/popscience.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 737 B |
59
resources/recipes/popscience.recipe
Normal file
59
resources/recipes/popscience.recipe
Normal file
@ -0,0 +1,59 @@
|
||||
import re
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class AdvancedUserRecipe1282101454(BasicNewsRecipe):
|
||||
title = 'Popular Science'
|
||||
language = 'en'
|
||||
__author__ = 'TonytheBookworm'
|
||||
description = 'Popular Science'
|
||||
publisher = 'Popular Science'
|
||||
category = 'gadgets,science'
|
||||
oldest_article = 7 # change this if you want more current articles. I like to go a week in
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
remove_javascript = True
|
||||
|
||||
masthead_url = 'http://www.raytheon.com/newsroom/rtnwcm/groups/Public/documents/masthead/rtn08_popscidec_masthead.jpg'
|
||||
|
||||
remove_tags = [dict(name='div', attrs={'id':['toolbar','main_supplements']}),
|
||||
dict(name='span', attrs={'class':['comments']}),
|
||||
dict(name='div', attrs={'class':['relatedinfo related-right','node_navigation','content2']}),
|
||||
dict(name='ul', attrs={'class':['item-list clear-block']})]
|
||||
feeds = [
|
||||
|
||||
('Gadgets', 'http://www.popsci.com/full-feed/gadgets'),
|
||||
('Cars', 'http://www.popsci.com/full-feed/cars'),
|
||||
('Science', 'http://www.popsci.com/full-feed/science'),
|
||||
('Technology', 'http://www.popsci.com/full-feed/technology'),
|
||||
('DIY', 'http://www.popsci.com/full-feed/diy'),
|
||||
|
||||
]
|
||||
|
||||
|
||||
#The following will get read of the Gallery: links when found
|
||||
|
||||
def preprocess_html(self, soup) :
|
||||
print 'SOUP IS: ', soup
|
||||
weblinks = soup.findAll(['head','h2'])
|
||||
if weblinks is not None:
|
||||
for link in weblinks:
|
||||
if re.search('(Gallery)(:)',str(link)):
|
||||
|
||||
link.parent.extract()
|
||||
return soup
|
||||
#-----------------------------------------------------------------
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user