Updata Gamasutra

This commit is contained in:
Kovid Goyal 2015-11-12 09:08:20 +05:30
parent e0d31bafa7
commit 16c216e8ac
2 changed files with 25 additions and 32 deletions

View File

@ -3,8 +3,6 @@ __copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
'''
gamasutra.com
'''
import re
from calibre.web.feeds.news import BasicNewsRecipe
class Gamasutra(BasicNewsRecipe):
@ -21,7 +19,6 @@ class Gamasutra(BasicNewsRecipe):
language = 'en'
remove_empty_feeds = True
masthead_url = 'http://www.gamasutra.com/images/gamasutra_logo.gif'
extra_css = ' body{font-family: Verdana,Arial,Helvetica,sans-serif } img{margin-bottom: 0.4em} .title{font-size: x-large; font-weight: bold} '
conversion_options = {
'comment' : description
@ -30,27 +27,21 @@ class Gamasutra(BasicNewsRecipe):
, 'language' : language
, 'linearize_tables' : True
}
preprocess_regexps = [
(re.compile(r'<head>.*?<title>', re.DOTALL|re.IGNORECASE),lambda match: '<head><title>')
,(re.compile(r'</title>.*?</head>', re.DOTALL|re.IGNORECASE),lambda match: '</title></head>')
,(re.compile(r'</head>', re.DOTALL|re.IGNORECASE),lambda match: '</head><body>')
]
remove_tags_before = dict(name="div",attrs={'class':'page_item'})
remove_tags = [
dict(name=['object','embed','iframe'])
,dict(attrs={'class':'adBox'})
dict(name='meta')
,dict(name='link')
,dict(name='hr')
,dict(name='div', attrs={'class':'hide-phone'})
,dict(name='div', attrs={'class':'nav_links'})
,dict(name='div', attrs={'class':'superfooter'})
,dict(name='span', attrs={'class':'comment_text'})
,dict(name='a', attrs={'type':'button'})
]
remove_tags_before = dict(attrs={'class':'title'})
remove_attributes = ['width','height','name']
feeds = [(u'Feature Articles', u'http://feeds.feedburner.com/GamasutraFeatureArticles')]
def print_version(self, url):
return url + '?print=1'
def get_article_url(self, article):
return article.get('guid', None)
def preprocess_html(self, soup):
for item in soup.findAll(style=True):
del item['style']
return self.adeify_images(soup)
return url.partition('?')[0] + '?print=1'

View File

@ -3,7 +3,6 @@ __copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
'''
gamasutra.com
'''
from calibre.web.feeds.news import BasicNewsRecipe
class Gamasutra(BasicNewsRecipe):
@ -20,7 +19,6 @@ class Gamasutra(BasicNewsRecipe):
language = 'en'
remove_empty_feeds = True
masthead_url = 'http://www.gamasutra.com/images/gamasutra_logo.gif'
extra_css = ' body{font-family: Verdana,Arial,Helvetica,sans-serif } img{margin-bottom: 0.4em} .newsTitle{font-size: xx-large; font-weight: bold} '
conversion_options = {
'comment' : description
@ -30,16 +28,20 @@ class Gamasutra(BasicNewsRecipe):
, 'linearize_tables' : True
}
remove_tags = [dict(attrs={'class':['relatedNews','adBox']})]
keep_only_tags = [dict(attrs={'class':['newsTitle','newsAuth','newsDate','newsText']})]
remove_attributes = ['width','height']
remove_tags_before = dict(name="div",attrs={'class':'page_item'})
remove_tags = [
dict(name='meta')
,dict(name='link')
,dict(name='hr')
,dict(name='div', attrs={'class':'hide-phone'})
,dict(name='div', attrs={'class':'nav_links'})
,dict(name='div', attrs={'class':'superfooter'})
,dict(name='span', attrs={'class':'comment_text'})
,dict(name='a', attrs={'type':'button'})
]
remove_attributes = ['width','height','name']
feeds = [(u'News', u'http://feeds.feedburner.com/GamasutraNews')]
def get_article_url(self, article):
return article.get('guid', None)
def preprocess_html(self, soup):
for item in soup.findAll(style=True):
del item['style']
return self.adeify_images(soup)
def print_version(self, url):
return url.partition('?')[0] + '?print=1'