diff --git a/recipes/hackernews_with_comments.recipe b/recipes/hackernews_with_comments.recipe deleted file mode 100644 index 951a65fd78..0000000000 --- a/recipes/hackernews_with_comments.recipe +++ /dev/null @@ -1,149 +0,0 @@ -#!/usr/bin/env python - -__license__ = 'GPL v3' -''' -Hacker News (with comments) -''' -from calibre.ptempfile import PersistentTemporaryFile -from calibre.web.feeds.news import BasicNewsRecipe - -try: - from urllib.parse import urlparse -except ImportError: - from urlparse import urlparse -import re - - -class HNWithComments(BasicNewsRecipe): - title = 'HN With actual comments' - __author__ = 'Tom Scholl & David Kerschner' - description = u'Hacker News, run by Y Combinator. Anything that good hackers would find interesting, with a focus on programming and startups.' - publisher = 'Y Combinator' - category = 'news, programming, it, technology' - delay = 1 - max_articles_per_feed = 20 - oldest_article = 3 - use_embedded_content = False - no_stylesheets = True - encoding = 'utf-8' - language = 'en' - requires_version = (0, 8, 16) - - feeds = [ - (u'Hacker News Frontpage', 'https://hnrss.org/frontpage'), - (u'Ask Hacker News', 'https://hnrss.org/ask') - ] - - temp_files = [] - articles_are_obfuscated = True - - def get_readable_content(self, url): - self.log('get_readable_content(' + url + ')') - br = self.get_browser() - f = br.open(url) - html = f.read() - f.close() - - return self.extract_readable_article(html, url) - - def get_hn_content(self, url): - self.log('get_hn_content(' + url + ')') - soup = self.index_to_soup(url) - main = soup.find('tr').findNextSiblings('tr', limit=2)[1].td - - title_element = main.select('td.title .titleline a')[0] - self.log('title_element=' + repr(title_element)) - title = self.tag_to_string(title_element) - self.log('title=' + title) - link = title_element['href'] - # link = main.find('td', 'title').find('a')['href'] - if link.startswith('item?'): - link = 'https://news.ycombinator.com/' + link - readable_link = link.rpartition('http://')[2].rpartition('https://')[2] - subtext = self.tag_to_string(main.find('td', 'subtext')) - - title_content_td = main.find('td', 'title').findParent( - 'tr').findNextSiblings('tr', limit=3)[2].findAll('td', limit=2)[1] - title_content = u'' - if not title_content_td.find('form'): - title_content_td.name = 'div' - title_content = title_content_td.prettify() - - comments = u'' - for td in main.findAll('td', 'default'): - comhead = td.find('span', 'comhead') - if comhead: - com_title = u'
' + readable_link + \
- u'
' + subtext + u'