From 04cb2ec6c6378f05a9efa0bd7080a078029ae332 Mon Sep 17 00:00:00 2001 From: unkn0w7n <51942695+unkn0w7n@users.noreply.github.com> Date: Sun, 8 Jun 2025 13:23:24 +0530 Subject: [PATCH] Delete lwn_weekly.recipe duplicate --- recipes/lwn_weekly.recipe | 182 -------------------------------------- 1 file changed, 182 deletions(-) delete mode 100644 recipes/lwn_weekly.recipe diff --git a/recipes/lwn_weekly.recipe b/recipes/lwn_weekly.recipe deleted file mode 100644 index 162078db8b..0000000000 --- a/recipes/lwn_weekly.recipe +++ /dev/null @@ -1,182 +0,0 @@ -#!/usr/bin/env python - -from __future__ import print_function - -__license__ = 'GPL v3' -__copyright__ = '2011, Davide Cavalca ' -''' -lwn.net -''' - -import re -import sys - -from calibre.utils.date import now as nowf -from calibre.web.feeds.news import BasicNewsRecipe - - -class WeeklyLWN(BasicNewsRecipe): - title = 'LWN.net Weekly Edition' - description = 'Weekly summary of what has happened in the free software world.' - __author__ = 'Davide Cavalca' - language = 'en' - site_url = u'http://lwn.net' - - no_stylesheets = True - remove_javascript = True - extra_css = '''pre,code,samp,kbd,tt { font-size: 80% } - blockquote {margin-left:0 } - DIV.BigQuote,SPAN { font-style:oblique } - DIV.FeatureByline,DIV.GaByline { background-color:#EEE } - DIV.tlr { background-color:#EED; border-style:groove; } - * { color: black }''' - - cover_url = site_url + '/images/lcorner.png' - # masthead_url = 'http://lwn.net/images/lcorner.png' - publication_type = 'magazine' - - keep_only_tags = [dict(attrs={'class': ['PageHeadline', 'ArticleText']})] - remove_tags = [dict(name=['h2', 'form'])] - - preprocess_regexps = [ - # Remove the
and "Log in to post comments" - (re.compile(r'
]+>\s*\n\s*.*?comments[)]'), lambda m: ''), - ] - - conversion_options = { - 'no_inline_navbars': True, - } - - oldest_article = 7.0 - needs_subscription = 'optional' - - recipe_specific_options = { - 'issue': { - 'short': 'The ID of the edition to download', - 'long': 'For example, 998950\nHint: The ID can be found within the edition URL' - } - } - - def get_browser(self): - br = BasicNewsRecipe.get_browser(self) - if self.username is not None and self.password is not None: - br.open('https://lwn.net/login') - br.select_form(name='loginform') - br['uname'] = self.username - br['pword'] = self.password - br.submit() - return br - - def print_version(self, url): - - # Strip off anchor - url = url.split('#')[0] - - # Prepend site_url - if url[0:len(self.site_url)] != self.site_url: - url = self.site_url + url - - # Append printable URL parameter - print_param = '?format=printable' - if url[-len(print_param):] != print_param: - url += print_param - - return url - - def publication_date(self): - return self.pub_date - - def parse_publication_date(self, soup): - from dateutil.parser import ParserError, parse - try: - date_match = re.match(r'.* +for +([^\[]*)', self.tag_to_string(soup.head.title.string)) - # dateutil.parser.parse() is considered thread-safe - self.pub_date = parse(date_match[1]) - except (TypeError, ParserError): - self.log.warning('Failed to parse publication date from title: %r, using current time' % soup.head.title.string) - self.pub_date = nowf() - - def parse_index(self): - past_edition = self.recipe_specific_options.get('issue') - if past_edition and isinstance(past_edition, str): - index_url = self.print_version(f'/Articles/{past_edition}/bigpage') - elif self.username is not None and self.password is not None: - index_url = self.print_version('/current/bigpage') - else: - index_url = self.print_version('/free/bigpage') - soup = self.index_to_soup(index_url) - self.parse_publication_date(soup) - curr = soup.body - - articles = {} - ans = [] - - section = self.tag_to_string(soup.title) - subsection = None - - while True: - curr = curr.findNext( - attrs={'class': ['SummaryHL', 'Cat1HL', 'Cat2HL']}) - - if curr is None: - break - - text = self.tag_to_string(curr.contents[0]) - cclass = ''.join(curr['class']) - - if 'Cat2HL' in cclass: - subsection = text - - elif 'Cat1HL' in cclass: - section = text - subsection = None - - elif 'SummaryHL' in cclass: - article_title = text - if not article_title: - article_title = _('Undefined article title') - - if subsection: - section_title = '%s: %s' % (section, subsection) - else: - section_title = section - - # Most articles have anchors in their titles, *except* the - # security vulnerabilities - article_anchor = curr.find( - name='a', attrs={'href': re.compile(r'^/Articles/')}) - - if article_anchor: - article_url = article_anchor.get('href') - if not article_url: - print('article_url is None for article_anchor "%s": "%s"' - % (str(article_anchor), article_title), file=sys.stderr) - continue - - else: - self.log.warn('article_anchor is None for "%s"; skipping' % article_title) - article_url = None - continue - - if section_title not in articles: - articles[section_title] = [] - if section_title not in ans: - ans.append(section_title) - - articles[section_title].append({ - 'url': article_url, - 'title': article_title, - 'description': '', 'content': '', 'date': '', - }) - - else: - self.log.error('lwn_weekly.recipe: something bad happened; should not be able to reach this') - - ans = [(section2, articles[section2]) - for section2 in ans if section2 in articles] - # from pprint import pprint - # pprint(ans) - - return ans - -# vim: expandtab:ts=4:sw=4