Fix #850382 (Updated recipe for twitch films)

This commit is contained in:
Kovid Goyal 2011-09-14 15:14:06 -06:00
parent ea325148ea
commit 31534f3dd2
2 changed files with 34 additions and 20 deletions

Binary file not shown.

After

Width:  |  Height:  |  Size: 200 B

View File

@ -1,12 +1,9 @@
#!/usr/bin/env python
__license__ = 'GPL v3' __license__ = 'GPL v3'
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>' __copyright__ = '2009-2011, Darko Miletic <darko.miletic at gmail.com>'
''' '''
twitchfilm.net/site/ twitchfilm.net/news/
''' '''
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import Tag
class Twitchfilm(BasicNewsRecipe): class Twitchfilm(BasicNewsRecipe):
title = 'Twitch Films' title = 'Twitch Films'
@ -15,29 +12,46 @@ class Twitchfilm(BasicNewsRecipe):
oldest_article = 30 oldest_article = 30
max_articles_per_feed = 100 max_articles_per_feed = 100
no_stylesheets = True no_stylesheets = True
use_embedded_content = True use_embedded_content = False
encoding = 'utf-8' encoding = 'utf-8'
publisher = 'Twitch' publisher = 'Twitch'
masthead_url = 'http://twitchfilm.com/img/logo.png'
category = 'twitch, twitchfilm, movie news, movie reviews, cult cinema, independent cinema, anime, foreign cinema, geek talk' category = 'twitch, twitchfilm, movie news, movie reviews, cult cinema, independent cinema, anime, foreign cinema, geek talk'
language = 'en' language = 'en'
lang = 'en-US'
conversion_options = { conversion_options = {
'comment' : description 'comment' : description
, 'tags' : category , 'tags' : category
, 'publisher' : publisher , 'publisher': publisher
, 'language' : lang , 'language' : language
, 'pretty_print' : True
} }
remove_tags = [dict(name='div', attrs={'class':'feedflare'})] keep_only_tags=[dict(attrs={'class':'asset-header'})]
remove_tags_after=dict(attrs={'class':'asset-body'})
remove_tags = [ dict(name='div', attrs={'class':['social','categories']})
, dict(attrs={'id':'main-asset'})
, dict(name=['meta','link','iframe','embed','object'])
]
feeds = [(u'News', u'http://feedproxy.google.com/TwitchEverything')] feeds = [(u'News', u'http://feeds.twitchfilm.net/TwitchEverything')]
def preprocess_html(self, soup): def preprocess_html(self, soup):
mtag = Tag(soup,'meta',[('http-equiv','Content-Type'),('context','text/html; charset=utf-8')]) for item in soup.findAll(style=True):
soup.head.insert(0,mtag) del item['style']
soup.html['lang'] = self.lang for item in soup.findAll('a'):
return self.adeify_images(soup) limg = item.find('img')
if item.string is not None:
str = item.string
item.replaceWith(str)
else:
if limg:
item.name = 'div'
item.attrs = []
else:
str = self.tag_to_string(item)
item.replaceWith(str)
for item in soup.findAll('img'):
if not item.has_key('alt'):
item['alt'] = 'image'
return soup