diff --git a/resources/images/news/bighollywood.png b/resources/images/news/bighollywood.png new file mode 100644 index 0000000000..9fc42b2fd4 Binary files /dev/null and b/resources/images/news/bighollywood.png differ diff --git a/resources/recipes/bighollywood.recipe b/resources/recipes/bighollywood.recipe new file mode 100644 index 0000000000..7213bd3257 --- /dev/null +++ b/resources/recipes/bighollywood.recipe @@ -0,0 +1,63 @@ +__license__ = 'GPL v3' +__copyright__ = '2011, Darko Miletic ' +''' +bighollywood.breitbart.com +''' + +from calibre.web.feeds.news import BasicNewsRecipe + +class BigHollywood(BasicNewsRecipe): + title = 'Big Hollywood' + __author__ = 'Darko Miletic' + description = 'News and articles from the media world' + publisher = 'Big Hollywood' + category = 'news, media, art, literature, movies, politics, USA, Hollywood' + oldest_article = 7 + max_articles_per_feed = 200 + no_stylesheets = True + encoding = 'utf8' + use_embedded_content = False + language = 'en' + remove_empty_feeds = True + publication_type = 'blog' + extra_css = """ + body{font-family: Arial,sans-serif } + """ + + conversion_options = { + 'comment' : description + , 'tags' : category + , 'publisher' : publisher + , 'language' : language + } + + keep_only_tags=[dict(attrs={'class':'postcontent'})] + remove_tags = [ + dict(name=['meta','link','link','iframe','embed','object']) + ,dict(name='p', attrs={'class':['post_meta_links','postfooter']}) + ] + remove_attributes=['original','onclick'] + + feeds = [(u'Articles', u'http://bighollywood.breitbart.com/feed/')] + + def preprocess_html(self, soup): + for item in soup.findAll(style=True): + del item['style'] + for item in soup.findAll('a'): + limg = item.find('img') + if item.string is not None: + str = item.string + item.replaceWith(str) + else: + if limg: + if limg['src'].endswith('BlogPrintButton.png'): + limg.extract() + item.name = 'div' + item.attrs = [] + else: + str = self.tag_to_string(item) + item.replaceWith(str) + for item in soup.findAll('img'): + if not item.has_key('alt'): + item['alt'] = 'image' + return soup