diff --git a/recipes/telegraph_uk.recipe b/recipes/telegraph_uk.recipe deleted file mode 100644 index ebb6dc2fae..0000000000 --- a/recipes/telegraph_uk.recipe +++ /dev/null @@ -1,121 +0,0 @@ -__license__ = 'GPL v3' -__copyright__ = '2008-2010, Darko Miletic ' -''' -telegraph.co.uk -''' -from calibre.web.feeds.news import BasicNewsRecipe -import json - - -def classes(classes): - q = frozenset(classes.split(' ')) - return dict( - attrs={'class': lambda x: x and frozenset(x.split()).intersection(q)} - ) - - -def absolutize(url): - if url.startswith('/'): - url = 'http://www.telegraph.co.uk' + url - return url - - -class TelegraphUK(BasicNewsRecipe): - title = 'The Telegraph (UK)' - __author__ = 'A10KiloHam, based on work by Darko Miletic and Sujata Raman' - description = 'News from United Kingdom' - oldest_article = 2 - category = 'news, politics, UK' - publisher = 'Telegraph Media Group ltd.' - max_articles_per_feed = 100 - no_stylesheets = True - language = 'en_GB' - encoding = 'utf-8' - needs_subscription = True - ignore_duplicate_articles = {'title', 'url'} - remove_empty_feeds = True - use_embedded_content = False - INDEX = 'https://www.telegraph.co.uk/' - LOGIN = 'https://secure.telegraph.co.uk/customer/secure/login/?redirectTo=https%3A%2F%2Fwww.telegraph.co.uk%2F' - PREFIX = u'https://www.telegraph.co.uk' - - feeds = [(u'News', u'http://www.telegraph.co.uk/news/rss.xml'), - (u'Politics', u'https://www.telegraph.co.uk/politics/rss.xml'), - (u'Business', u'http://www.telegraph.co.uk/business/rss.xml'), - (u'Money', u'http://www.telegraph.co.uk/money/rss.xml'), - (u'Technology', u'http://www.telegraph.co.uk/technology/rss.xml'), - (u'Science', u'http://www.telegraph.co.uk/science/rss.xml'), - (u'Opinion', u'http://www.telegraph.co.uk/opinion/rss.xml'), - (u'Travel', u'http://www.telegraph.co.uk/travel/rss.xml'), - (u'Culture', u'http://www.telegraph.co.uk/culture/rss.xml'), - (u'Lifestyle', u'http://www.telegraph.co.uk/lifestyle/rss.xml'), - (u'Money', u'http://www.telegraph.co.uk/opinion/rss.xml'), - (u'Opinion', u'http://www.telegraph.co.uk/money/rss.xml'), - (u'Fashion', u'http://www.telegraph.co.uk/fashion/rss.xml')] - - keep_only_tags = [ - classes( - 'lead-asset-image-container headline__heading footer-author article-author__meta' - ), - dict(itemprop='articleBody'), - ] - - remove_tags = [ - dict(name=['link', 'meta', 'style']), - classes('videoPlayer'), - ] - remove_attributes = 'width height'.split() - - def get_cover_url(self): - from datetime import date - cover = 'http://img.kiosko.net/' + str( - date.today().year - ) + '/' + date.today().strftime('%m') + '/' + date.today( - ).strftime('%d') + '/uk/daily_telegraph.750.jpg' - br = BasicNewsRecipe.get_browser(self) - try: - br.open(cover) - except: - index = 'http://en.kiosko.net/uk/np/daily_telegraph.html' - soup = self.index_to_soup(index) - for image in soup.findAll('img', src=True): - if image['src'].endswith('750.jpg'): - return image['src'] - self.log("\nCover unavailable") - cover = None - return cover - - def get_browser(self, *a, **kw): - USER_AGENT = 'Mozilla/5.0 (X11; Linux x86_64; rv:52.0) Gecko/20100101 Firefox/52.0' - br = BasicNewsRecipe.get_browser(self, user_agent=USER_AGENT) - self.log('Forming login request...') - if self.username is not None and self.password is not None: - self.log('Starting login process...') - br.open(self.LOGIN) - br.select_form(nr=0) - br['email'] = self.username - br['password'] = self.password - self.log('Sending login request...') - br.submit() - return br - - def get_article_url(self, article): - url = article.get('link', None) - if 'picture-galleries' in url or 'pictures' in url or 'picturegalleries' in url: - url = None - return url - - def preprocess_html(self, soup): - for img in soup.findAll(attrs={'data-frz-src-array': True}): - img['style'] = '' - img.name = 'img' - d = json.loads(img['data-frz-src-array'].replace("'", '"')) - for item in d: - if int(item.get('width', 0)) > 700: - img['src'] = absolutize(item['src']) - break - for img in soup.findAll('div', attrs={'data-js': 'LazyImage'}): - img['style'] = '' - img.name = 'img' - img['src'] = img['data-srcset'].split()[0] - return soup