diff --git a/recipes/lifehacker.recipe b/recipes/lifehacker.recipe index f7beaa1c7c..861eb1c208 100644 --- a/recipes/lifehacker.recipe +++ b/recipes/lifehacker.recipe @@ -1,87 +1,35 @@ +#!/usr/bin/env python2 +# vim:fileencoding=utf-8 +# License: GPLv3 Copyright: 2017, Kovid Goyal from calibre.web.feeds.news import BasicNewsRecipe -from datetime import datetime -from calibre.ebooks.BeautifulSoup import Tag -from calibre.utils.magick import Image, PixelWand + + +def classes(classes): + q = frozenset(classes.split(' ')) + return dict( + attrs={'class': lambda x: x and frozenset(x.split()).intersection(q)}) class LifeHacker(BasicNewsRecipe): - THUMBALIZR_API = '' # ---->Get your at http://www.thumbalizr.com/ and put here - LANGUAGE = 'en' - LANGHTM = 'en' - language = 'en' - ENCODING = 'utf' - ENCHTM = 'utf-8' - requires_version = (0, 7, 47) - news = True - title = u'LifeHacker' - __author__ = 'Euler Alves' + __author__ = 'Kovid Goyal' + language = 'en' description = u'Tips, tricks, and downloads for getting things done.' publisher = u'lifehacker.com' - author = u'Adam Pash & Kevin Purdy & Adam Dachis & Whitson Gordon & Gina Trapani' - category = 'news, rss' - oldest_article = 4 max_articles_per_feed = 20 summary_length = 1000 remove_javascript = True no_stylesheets = True - use_embedded_content = True + use_embedded_content = False remove_empty_feeds = True - timefmt = ' [%d %b %Y (%a)]' - - hoje = datetime.now() - pubdate = hoje.strftime('%a, %d %b') - cover_url = 'http://api.thumbalizr.com/?api_key=' + THUMBALIZR_API + \ - '&url=http://lifehacker.com&width=600&quality=90' - cover_margins = (0, 0, 'white') - masthead_url = 'http://cache.gawkerassets.com/assets/lifehacker.com/img/logo.png' + keep_only_tags = [ + classes('headline author post-content'), + ] remove_tags = [ - {'class': 'feedflare'}, - dict(name='div', - attrs={'class': [ - 'ad_container', 'ad_300x250', 'ad_interstitial', 'share-wrap', 'ad_300x600', 'ad_perma-footer-adsense', 'ad_perma-panorama', 'ad panorama', 'ad_container' # noqa - ]}), dict(name='div', - attrs={'id': [ - 'agegate_container', 'agegate_container_rejected', 'sharemenu-wrap' - ]}) + classes('commerce-inset'), ] feeds = [(u'Articles', u'https://lifehacker.com/rss')] - - conversion_options = { - 'title': title, 'comments': description, 'publisher': publisher, 'tags': category, 'language': LANGUAGE, 'linearize_tables': True - } - - def preprocess_html(self, soup): - for item in soup.findAll(style=True): - del item['style'] - if not soup.find(attrs={'http-equiv': 'Content-Language'}): - meta0 = Tag(soup, 'meta', [ - ("http-equiv", "Content-Language"), ("content", self.LANGHTM)]) - soup.head.insert(0, meta0) - if not soup.find(attrs={'http-equiv': 'Content-Type'}): - meta1 = Tag(soup, 'meta', [ - ("http-equiv", "Content-Type"), ("content", "text/html; charset=" + self.ENCHTM)]) - soup.head.insert(0, meta1) - return soup - - def postprocess_html(self, soup, first): - # process all the images. assumes that the new html has the correct - # path - for tag in soup.findAll(lambda tag: tag.name.lower() == 'img' and tag.has_key('src')): # noqa - iurl = tag['src'] - img = Image() - img.open(iurl) - width, height = img.size - print 'img is: ', iurl, 'width is: ', width, 'height is: ', height - if img < 0: - raise RuntimeError('Out of memory') - pw = PixelWand() - if(width > height and width > 590): - print 'Rotate image' - img.rotate(pw, -90) - img.save(iurl) - return soup