From 8c2b672e6c086db3003f3f1e7ce2283d7c57da1d Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sun, 26 Sep 2010 12:30:19 -0600 Subject: [PATCH] ... --- resources/recipes/boortz.recipe | 15 +++++++-------- resources/recipes/popscience.recipe | 26 +++++++++++++------------- 2 files changed, 20 insertions(+), 21 deletions(-) diff --git a/resources/recipes/boortz.recipe b/resources/recipes/boortz.recipe index 0b52e0b9ca..8fb8041411 100644 --- a/resources/recipes/boortz.recipe +++ b/resources/recipes/boortz.recipe @@ -1,5 +1,4 @@ from calibre.web.feeds.news import BasicNewsRecipe -from calibre.ebooks.BeautifulSoup import BeautifulSoup, re class AdvancedUserRecipe1282101454(BasicNewsRecipe): title = 'Nealz Nuze' language = 'en' @@ -12,11 +11,11 @@ class AdvancedUserRecipe1282101454(BasicNewsRecipe): linearize_tables = True no_stylesheets = True remove_javascript = True - + masthead_url = 'http://boortz.com/images/nuze_logo.gif' keep_only_tags = [ dict(name='td', attrs={'id':['contentWellCell']}) - + ] remove_tags = [ dict(name='a', attrs={'class':['blogPermalink']}), @@ -26,13 +25,13 @@ class AdvancedUserRecipe1282101454(BasicNewsRecipe): remove_tags_after = [dict(name='div', attrs={'class':'blogEntryBody'}),] feeds = [ ('NUZE', 'http://boortz.com/nealz_nuze_rss/rss.xml') - + ] - - - - + + + + diff --git a/resources/recipes/popscience.recipe b/resources/recipes/popscience.recipe index 2bef7e4807..1527a1bb71 100644 --- a/resources/recipes/popscience.recipe +++ b/resources/recipes/popscience.recipe @@ -1,5 +1,5 @@ from calibre.web.feeds.news import BasicNewsRecipe -from calibre.ebooks.BeautifulSoup import BeautifulSoup, re +from calibre.ebooks.BeautifulSoup import re class AdvancedUserRecipe1282101454(BasicNewsRecipe): title = 'Popular Science' @@ -13,35 +13,35 @@ class AdvancedUserRecipe1282101454(BasicNewsRecipe): no_stylesheets = True remove_javascript = True use_embedded_content = True - + masthead_url = 'http://www.raytheon.com/newsroom/rtnwcm/groups/Public/documents/masthead/rtn08_popscidec_masthead.jpg' - - + + feeds = [ - + ('Gadgets', 'http://www.popsci.com/full-feed/gadgets'), ('Cars', 'http://www.popsci.com/full-feed/cars'), ('Science', 'http://www.popsci.com/full-feed/science'), ('Technology', 'http://www.popsci.com/full-feed/technology'), ('DIY', 'http://www.popsci.com/full-feed/diy'), - + ] - - #The following will get read of the Gallery: links when found - + + #The following will get read of the Gallery: links when found + def preprocess_html(self, soup) : print 'SOUP IS: ', soup weblinks = soup.findAll(['head','h2']) if weblinks is not None: for link in weblinks: if re.search('(Gallery)(:)',str(link)): - + link.parent.extract() return soup - #----------------------------------------------------------------- - - + #----------------------------------------------------------------- + +