From 984374825fd15ab866defb812c2a87ddc4b1e36d Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sun, 23 Sep 2012 17:10:08 +0530 Subject: [PATCH] Update Mac World and Maximum PC --- recipes/mac_world.recipe | 24 ++++++++++++----------- recipes/maximum_pc.recipe | 41 +++++++++++++++++++-------------------- 2 files changed, 33 insertions(+), 32 deletions(-) diff --git a/recipes/mac_world.recipe b/recipes/mac_world.recipe index 486aa9cb87..5abbffb6bb 100644 --- a/recipes/mac_world.recipe +++ b/recipes/mac_world.recipe @@ -34,20 +34,21 @@ class macWorld(BasicNewsRecipe): remove_javascript = True no_stylesheets = True + auto_cleanup = True - keep_only_tags = [ - dict(name='div', attrs={'id':'content'}) - ] + #keep_only_tags = [ + #dict(name='div', attrs={'id':'content'}) + #] - remove_tags = [ - {'class':['toolBar','mac_tags','toolBar btmTools','textAds']}, - dict(name='p', attrs={'class':'breadcrumbs'}), - dict(id=['breadcrumb','sidebar','comments','topContentWrapper', - 'rightColumn', 'aboveFootPromo', 'storyCarousel']), - {'class':lambda x: x and ('tools' in x or 'toolBar' - in x)} + #remove_tags = [ + #{'class':['toolBar','mac_tags','toolBar btmTools','textAds']}, + #dict(name='p', attrs={'class':'breadcrumbs'}), + #dict(id=['breadcrumb','sidebar','comments','topContentWrapper', + #'rightColumn', 'aboveFootPromo', 'storyCarousel']), + #{'class':lambda x: x and ('tools' in x or 'toolBar' + #in x)} - ] + #] feeds = [ (u'MacWorld Headlines', u'http://rss.macworld.com/macworld/news'), @@ -82,3 +83,4 @@ class macWorld(BasicNewsRecipe): .articleInfo {color:#4D4D4D;font-family:Arial,Helvetica,sans-serif;font-size:10px; font-size-adjust:none; font-stretch:normal; font-style:bold; font-variant:normal; font-weight:bold; line-height:10px; text-decoration:none;} img {align:left;} ''' + diff --git a/recipes/maximum_pc.recipe b/recipes/maximum_pc.recipe index 3e4d8a58d9..c6e8099fcf 100644 --- a/recipes/maximum_pc.recipe +++ b/recipes/maximum_pc.recipe @@ -1,4 +1,3 @@ -from calibre.ptempfile import PersistentTemporaryFile from calibre.web.feeds.news import BasicNewsRecipe class AdvancedUserRecipe1276930924(BasicNewsRecipe): @@ -14,30 +13,30 @@ class AdvancedUserRecipe1276930924(BasicNewsRecipe): use_embedded_content = False no_stylesheets = True language = 'en' - temp_files = [] - articles_are_obfuscated = True - feeds = [(u'News', u'http://www.maximumpc.com/articles/4/feed'), + auto_cleanup = True + feeds = [#(u'News', u'http://www.maximumpc.com/articles/all/feed'), + (u'News', u'http://www.maximumpc.com/articles/4/feed'), (u'Reviews', u'http://www.maximumpc.com/articles/40/feed'), (u'Editors Blog', u'http://www.maximumpc.com/articles/6/feed'), (u'How-to', u'http://www.maximumpc.com/articles/32/feed'), (u'Features', u'http://www.maximumpc.com/articles/31/feed'), (u'From the Magazine', u'http://www.maximumpc.com/articles/72/feed') ] - keep_only_tags = [ - dict(name='div', attrs={'class':['print-title','article_body']}), - ] - remove_tags = [ - dict(name='div', attrs={'class':'comments-tags-actions'}), - ] - remove_tags_before = dict(name='div', attrs={'class':'print-title'}) - remove_tags_after = dict(name='div', attrs={'class':'meta-content'}) + #keep_only_tags = [ + #dict(name='div', attrs={'class':['print-title','article_body']}), + #] + #remove_tags = [ + #dict(name='div', attrs={'class':'comments-tags-actions'}), + #] + #remove_tags_before = dict(name='div', attrs={'class':'print-title'}) + #remove_tags_after = dict(name='div', attrs={'class':'meta-content'}) - def get_obfuscated_article(self, url): - br = self.get_browser() - br.open(url) - response = br.follow_link(url_regex = r'/print/[0-9]+', nr = 0) - html = response.read() - self.temp_files.append(PersistentTemporaryFile('_fa.html')) - self.temp_files[-1].write(html) - self.temp_files[-1].close() - return self.temp_files[-1].name + #def get_obfuscated_article(self, url): + #br = self.get_browser() + #br.open(url) + #response = br.follow_link(url_regex = r'/print/[0-9]+', nr = 0) + #html = response.read() + #self.temp_files.append(PersistentTemporaryFile('_fa.html')) + #self.temp_files[-1].write(html) + #self.temp_files[-1].close() + #return self.temp_files[-1].name