Fix Twitch Films recipe

This commit is contained in:
Kovid Goyal 2012-09-29 14:57:28 +05:30
parent f1ba6ad031
commit 4cafd33a52

View File

@ -13,6 +13,7 @@ class Twitchfilm(BasicNewsRecipe):
max_articles_per_feed = 100 max_articles_per_feed = 100
no_stylesheets = True no_stylesheets = True
use_embedded_content = False use_embedded_content = False
auto_cleanup = True
encoding = 'utf-8' encoding = 'utf-8'
publisher = 'Twitch' publisher = 'Twitch'
masthead_url = 'http://twitchfilm.com/img/logo.png' masthead_url = 'http://twitchfilm.com/img/logo.png'
@ -26,18 +27,18 @@ class Twitchfilm(BasicNewsRecipe):
, 'language' : language , 'language' : language
} }
keep_only_tags=[dict(attrs={'class':'asset-header'})] #keep_only_tags=[dict(attrs={'class':'asset-header'})]
remove_tags_after=dict(attrs={'class':'asset-body'}) #remove_tags_after=dict(attrs={'class':'asset-body'})
remove_tags = [ dict(name='div', attrs={'class':['social','categories']}) #remove_tags = [ dict(name='div', attrs={'class':['social','categories']})
, dict(attrs={'id':'main-asset'}) #, dict(attrs={'id':'main-asset'})
, dict(name=['meta','link','iframe','embed','object']) #, dict(name=['meta','link','iframe','embed','object'])
] #]
feeds = [(u'News', u'http://feeds.twitchfilm.net/TwitchEverything')] feeds = [(u'News', u'http://feeds.twitchfilm.net/TwitchEverything')]
def preprocess_html(self, soup): def preprocess_html(self, soup):
for item in soup.findAll(style=True): for item in soup.findAll(style=True):
del item['style'] del item['style']
for item in soup.findAll('a'): for item in soup.findAll('a'):
limg = item.find('img') limg = item.find('img')
if item.string is not None: if item.string is not None: