__license__ = 'GPL v3' __copyright__ = '2009-2012, Darko Miletic ' ''' www.variety.com ''' from calibre.web.feeds.recipes import BasicNewsRecipe class Variety(BasicNewsRecipe): title = 'Variety' __author__ = 'Darko Miletic' description = 'Breaking entertainment movie news, movie reviews, entertainment industry events, news and reviews from Cannes, Oscars, and Hollywood awards. Featuring box office charts, archives and more.' # noqa oldest_article = 2 max_articles_per_feed = 100 no_stylesheets = True use_embedded_content = False encoding = 'utf8' publisher = 'Red Business Information' category = 'Entertainment Industry News, Daily Variety, Movie Reviews, TV, Awards, Oscars, Cannes, Box Office, Hollywood' language = 'en' masthead_url = 'http://images1.variety.com/graphics/variety/Variety_logo_green_tm.gif' extra_css = """ body{font-family: Arial,Helvetica,sans-serif; font-size: 1.275em} .date{font-size: small; border: 1px dotted rgb(204, 204, 204); font-style: italic; color: rgb(102, 102, 102); margin: 5px 0px; padding: 0.5em;} .author{margin: 5px 0px 5px 20px; padding: 0.5em; background: none repeat scroll 0% 0% rgb(247, 247, 247);} .art h2{color: rgb(153, 0, 0); font-size: 1.275em; font-weight: bold;} img{margin-bottom: 1em} """ conversion_options = { 'comments': description, 'tags': category, 'language': language, 'publisher': publisher } remove_tags = [dict(name=['object', 'link', 'map'])] remove_attributes = ['lang', 'vspace', 'hspace', 'xmlns:ms', 'xmlns:dt'] keep_only_tags = [dict(name='div', attrs={'class': 'art control'})] feeds = [(u'News & Articles', u'http://feeds.feedburner.com/variety/headlines')] def print_version(self, url): rpt = url.rpartition('.html')[0] return rpt + '?printerfriendly=true' def preprocess_raw_html(self, raw, url): return '' + raw[raw.find(''):] def get_article_url(self, article): url = BasicNewsRecipe.get_article_url(self, article) return url.rpartition('?')[0] def preprocess_html(self, soup): for item in soup.findAll('a'): limg = item.find('img') if item.string is not None: str = item.string item.replaceWith(str) else: if limg: item.name = 'div' item.attrs = [] else: str = self.tag_to_string(item) item.replaceWith(str) for item in soup.findAll('img', alt=False): item['alt'] = 'image' return soup