Merge branch 'master' of https://github.com/saurabhnanda/calibre

2025-07-09 03:04:10 -04:00 · 2024-09-04 13:58:53 +05:30 · 2024-09-04 13:58:53 +05:30 · fdb4f5feff
commit fdb4f5feff
parent d0556746f2 889a03008c
1 changed files with 13 additions and 8 deletions
--- a/recipes/hackernews.recipe
+++ b/recipes/hackernews.recipe
@ -15,13 +15,14 @@ import re
 class HNWithCommentsLink(BasicNewsRecipe):
-    title = 'HN With Comments Link'
+    title = 'HN With Actual Comments'
    __author__ = 'Tom Scholl & David Kerschner'
    description = u'Hacker News, run by Y Combinator. Anything that good hackers would find interesting, with a focus on programming and startups.'
    publisher = 'Y Combinator'
    category = 'news, programming, it, technology'
    delay = 1
-    max_articles_per_feed = 30
+    max_articles_per_feed = 20
    oldest_article = 3
    use_embedded_content = False
    no_stylesheets = True
    encoding = 'utf-8'
@ -29,7 +30,8 @@ class HNWithCommentsLink(BasicNewsRecipe):
    requires_version = (0, 8, 16)
    feeds = [
-        (u'Hacker News', 'https://news.ycombinator.com/rss')
+        (u'Hacker News Frontpage', 'https://hnrss.org/frontpage'),
        (u'Ask Hacker News', 'https://hnrss.org/ask')
    ]
    temp_files = []
@ -49,8 +51,10 @@ class HNWithCommentsLink(BasicNewsRecipe):
        soup = self.index_to_soup(url)
        main = soup.find('tr').findNextSiblings('tr', limit=2)[1].td
-        title = self.tag_to_string(main.find('td', 'title'))
+        title_element = main.select('td.title .titleline a')[0]
-        link = main.find('td', 'title').find('a')['href']
+        title = self.tag_to_string(title_element)
        link = title_element['href']
        # link = main.find('td', 'title').find('a')['href']
        if link.startswith('item?'):
            link = 'https://news.ycombinator.com/' + link
        readable_link = link.rpartition('http://')[2].rpartition('https://')[2]
@ -88,11 +92,12 @@ class HNWithCommentsLink(BasicNewsRecipe):
        return u'<html><title>' + title + u'</title><body>' + body + '</body></html>'
    def parse_feeds(self):
-        a = super(HNWithCommentsLink, self).parse_feeds()
+        a = super(HNWithCommentsLinkAlt, self).parse_feeds()
        self.hn_articles = a[0].articles
        return a
    def get_obfuscated_article(self, url):
        self.log('get_obfuscated_article with url=' + url)
        if url.startswith('https://news.ycombinator.com'):
            content = self.get_hn_content(url)
        else:
@ -114,8 +119,8 @@ class HNWithCommentsLink(BasicNewsRecipe):
                if a.url == url:
                    article = a
-        content = re.sub(r'</body>\s*</html>\s*$', '', content) + \
+        # content = re.sub(r'</body>\s*</html>\s*$', '', content) + \
-            article.summary + '</body></html>'
+        #    article.summary + '</body></html>'
        if not isinstance(content, bytes):
            content = content.encode('utf-8')