From 16c216e8ac9d74b6fa39f1a37615470635861958 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Thu, 12 Nov 2015 09:08:20 +0530 Subject: [PATCH] Updata Gamasutra --- recipes/gamasutra_fa.recipe | 31 +++++++++++-------------------- recipes/gamasutra_news.recipe | 26 ++++++++++++++------------ 2 files changed, 25 insertions(+), 32 deletions(-) diff --git a/recipes/gamasutra_fa.recipe b/recipes/gamasutra_fa.recipe index 26a56849f7..06c60edc86 100644 --- a/recipes/gamasutra_fa.recipe +++ b/recipes/gamasutra_fa.recipe @@ -3,8 +3,6 @@ __copyright__ = '2010, Darko Miletic ' ''' gamasutra.com ''' - -import re from calibre.web.feeds.news import BasicNewsRecipe class Gamasutra(BasicNewsRecipe): @@ -21,7 +19,6 @@ class Gamasutra(BasicNewsRecipe): language = 'en' remove_empty_feeds = True masthead_url = 'http://www.gamasutra.com/images/gamasutra_logo.gif' - extra_css = ' body{font-family: Verdana,Arial,Helvetica,sans-serif } img{margin-bottom: 0.4em} .title{font-size: x-large; font-weight: bold} ' conversion_options = { 'comment' : description @@ -30,27 +27,21 @@ class Gamasutra(BasicNewsRecipe): , 'language' : language , 'linearize_tables' : True } - preprocess_regexps = [ - (re.compile(r'.*?', re.DOTALL|re.IGNORECASE),lambda match: '<head><title>') - ,(re.compile(r'.*?', re.DOTALL|re.IGNORECASE),lambda match: '') - ,(re.compile(r'', re.DOTALL|re.IGNORECASE),lambda match: '') - ] + + remove_tags_before = dict(name="div",attrs={'class':'page_item'}) remove_tags = [ - dict(name=['object','embed','iframe']) - ,dict(attrs={'class':'adBox'}) + dict(name='meta') + ,dict(name='link') + ,dict(name='hr') + ,dict(name='div', attrs={'class':'hide-phone'}) + ,dict(name='div', attrs={'class':'nav_links'}) + ,dict(name='div', attrs={'class':'superfooter'}) +,dict(name='span', attrs={'class':'comment_text'}) +,dict(name='a', attrs={'type':'button'}) ] - remove_tags_before = dict(attrs={'class':'title'}) remove_attributes = ['width','height','name'] feeds = [(u'Feature Articles', u'http://feeds.feedburner.com/GamasutraFeatureArticles')] def print_version(self, url): - return url + '?print=1' - - def get_article_url(self, article): - return article.get('guid', None) - - def preprocess_html(self, soup): - for item in soup.findAll(style=True): - del item['style'] - return self.adeify_images(soup) + return url.partition('?')[0] + '?print=1' diff --git a/recipes/gamasutra_news.recipe b/recipes/gamasutra_news.recipe index ab7a089e1c..94407f5e2f 100644 --- a/recipes/gamasutra_news.recipe +++ b/recipes/gamasutra_news.recipe @@ -3,7 +3,6 @@ __copyright__ = '2010, Darko Miletic ' ''' gamasutra.com ''' - from calibre.web.feeds.news import BasicNewsRecipe class Gamasutra(BasicNewsRecipe): @@ -20,7 +19,6 @@ class Gamasutra(BasicNewsRecipe): language = 'en' remove_empty_feeds = True masthead_url = 'http://www.gamasutra.com/images/gamasutra_logo.gif' - extra_css = ' body{font-family: Verdana,Arial,Helvetica,sans-serif } img{margin-bottom: 0.4em} .newsTitle{font-size: xx-large; font-weight: bold} ' conversion_options = { 'comment' : description @@ -30,16 +28,20 @@ class Gamasutra(BasicNewsRecipe): , 'linearize_tables' : True } - remove_tags = [dict(attrs={'class':['relatedNews','adBox']})] - keep_only_tags = [dict(attrs={'class':['newsTitle','newsAuth','newsDate','newsText']})] - remove_attributes = ['width','height'] + remove_tags_before = dict(name="div",attrs={'class':'page_item'}) + remove_tags = [ + dict(name='meta') + ,dict(name='link') + ,dict(name='hr') + ,dict(name='div', attrs={'class':'hide-phone'}) + ,dict(name='div', attrs={'class':'nav_links'}) + ,dict(name='div', attrs={'class':'superfooter'}) +,dict(name='span', attrs={'class':'comment_text'}) +,dict(name='a', attrs={'type':'button'}) + ] + remove_attributes = ['width','height','name'] feeds = [(u'News', u'http://feeds.feedburner.com/GamasutraNews')] - def get_article_url(self, article): - return article.get('guid', None) - - def preprocess_html(self, soup): - for item in soup.findAll(style=True): - del item['style'] - return self.adeify_images(soup) + def print_version(self, url): + return url.partition('?')[0] + '?print=1'