From 1a5b92d6d915775428c38b6f99768c6def9bf012 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sat, 18 Sep 2010 20:17:30 -0600 Subject: [PATCH] Popular Science by Tony Stegall --- resources/images/news/popscience.png | Bin 0 -> 737 bytes resources/recipes/popscience.recipe | 59 +++++++++++++++++++++++++++ 2 files changed, 59 insertions(+) create mode 100644 resources/images/news/popscience.png create mode 100644 resources/recipes/popscience.recipe diff --git a/resources/images/news/popscience.png b/resources/images/news/popscience.png new file mode 100644 index 0000000000000000000000000000000000000000..ff33483b10421b209f599cdec809544db83988e7 GIT binary patch literal 737 zcmeAS@N?(olHy`uVBq!ia0vp^f*{Pn1|+R>-G2cowj^(N7l!{JxM1({$v_d#0*}aI zAngIhZYQ(tK!Rljj_E)ete>H;(O;c`fr-o0#WBRQAzgQ^}I3sDr zEs}bFIvJBwqaPUcE+q zV_I6|?mA{M9-)bsPx{?cF*TW?b4hbr!70&y>`8k(&R^ALSkjdC{oLLFhrH{D3|BB5 z++3S2wAxB`X6MG1oAo9tny1q&@~&|Ix*cct+12Xkr1dM_v-vukMc+6dzaS%8aB<~- z=Ec3i^}QNBjW4-xew9kfkg$1qYW;6M&)2@rRhM{I2d_LeUt4?Qz5d(f$?xZtJl$R_ zp2C&PnR-(tx17;{-JfYe-yEll+Y&;Tzl{0B8`A%!{a>lP4cGLk-+K5BuW$Vs&S3Fg zT!6XO<1zQSS6My6Z@NtOH*fT?TKb;xfT(T9{rZV+(#L;v%+Ol=!usZ%v&UTa{Iba| z=6$0cTsM!&|4!7|_{V)Q5%-p#FZ6$B&$E}avc3PbNrOeP)aqX%sp|x9+Zopj21+HEtq-c=jMqw+y!ipc6y(Dxk6agNLl^gnQ!yIcW&&+*;e!DAoGgf-;b?p zVA#*#rWWaIlH~$Q9;zj-5hW>!C8<`)MX5lF!N|bSK-a)h*U%!w$iT|b)XKzM*TBrm rz(8f@sU{Q+x%nxXX_Y7%jI0cS$Pl98PswvoB4+S(^>bP0l+XkK_>3?| literal 0 HcmV?d00001 diff --git a/resources/recipes/popscience.recipe b/resources/recipes/popscience.recipe new file mode 100644 index 0000000000..a1ea91a6ae --- /dev/null +++ b/resources/recipes/popscience.recipe @@ -0,0 +1,59 @@ +import re +from calibre.web.feeds.news import BasicNewsRecipe + +class AdvancedUserRecipe1282101454(BasicNewsRecipe): + title = 'Popular Science' + language = 'en' + __author__ = 'TonytheBookworm' + description = 'Popular Science' + publisher = 'Popular Science' + category = 'gadgets,science' + oldest_article = 7 # change this if you want more current articles. I like to go a week in + max_articles_per_feed = 100 + no_stylesheets = True + remove_javascript = True + + masthead_url = 'http://www.raytheon.com/newsroom/rtnwcm/groups/Public/documents/masthead/rtn08_popscidec_masthead.jpg' + + remove_tags = [dict(name='div', attrs={'id':['toolbar','main_supplements']}), + dict(name='span', attrs={'class':['comments']}), + dict(name='div', attrs={'class':['relatedinfo related-right','node_navigation','content2']}), + dict(name='ul', attrs={'class':['item-list clear-block']})] + feeds = [ + + ('Gadgets', 'http://www.popsci.com/full-feed/gadgets'), + ('Cars', 'http://www.popsci.com/full-feed/cars'), + ('Science', 'http://www.popsci.com/full-feed/science'), + ('Technology', 'http://www.popsci.com/full-feed/technology'), + ('DIY', 'http://www.popsci.com/full-feed/diy'), + + ] + + + #The following will get read of the Gallery: links when found + + def preprocess_html(self, soup) : + print 'SOUP IS: ', soup + weblinks = soup.findAll(['head','h2']) + if weblinks is not None: + for link in weblinks: + if re.search('(Gallery)(:)',str(link)): + + link.parent.extract() + return soup + #----------------------------------------------------------------- + + + + + + + + + + + + + + +