Update Engadget

2025-07-09 03:04:10 -04:00 · 2023-09-30 08:00:25 +05:30 · 2023-09-30 08:00:25 +05:30 · 9098657e53
commit 9098657e53
parent 0872843a8b
1 changed files with 63 additions and 39 deletions
--- a/recipes/endgadget.recipe
+++ b/recipes/endgadget.recipe
@ -1,62 +1,86 @@
 #!/usr/bin/env python
-__license__ = 'GPL v3'
+__license__   = 'GPL v3'
 __copyright__ = 'Copyright 2011 Starson17'
 '''
 engadget.com
 '''
-from calibre.web.feeds.news import BasicNewsRecipe, classes
+from calibre.web.feeds.news import BasicNewsRecipe
 def classes(classes):
    q = frozenset(classes.split(' '))
    return dict(attrs={'class': lambda x: x and frozenset(x.split()).intersection(q)})
 class Engadget(BasicNewsRecipe):
-    title = u'Engadget'
+    title                 = u'Engadget'
-    __author__ = 'Starson17, modified by epubli'
+    __author__            = 'Starson17, modified by epubli'
-    __version__ = 'v2.0'
+    __version__           = 'v2.1'
-    __date__ = '14, Sep 2022'
+    __date__              = '2023-09-19'
-    description = 'Tech news'
+    description           = 'Tech news'
-    language = 'en'
+    language              = 'en'
-    oldest_article = 7
+    oldest_article        = 7
    max_articles_per_feed = 100
-    no_stylesheets = True
+    no_stylesheets        = True
-    use_embedded_content = False
+    use_embedded_content  = False
-    remove_javascript = True
+    remove_javascript     = True
-    remove_empty_feeds = True
+    remove_empty_feeds    = True
    compress_news_images = True
    scale_news_images_to_device = True
    cover_url = 'https://upload.wikimedia.org/wikipedia/commons/b/bb/Engadget-logo.svg'
-    keep_only_tags = [
+    keep_only_tags = [ 
-        dict(name='figure', attrs={'data-component': 'DefaultLede'}),
+        dict(name='div', attrs={'class':'caas-content-wrapper'}),
-        dict(name='div', attrs={'data-component': 'ArticleHeader'}),
+        dict(name='figure', attrs={'data-component':'DefaultLede'}),
-        dict(
+        dict(name='div', attrs={'data-component':'ArticleHeader'}),
-            name='div',
+        dict(name='div', attrs={'class':['article-text','article-text c-gray-1 no-review']}),
            attrs={'class': ['article-text', 'article-text c-gray-1 no-review']}
        ),
        dict(name='figure')
    ]
    remove_tags = [
-        dict(name='div', attrs={'data-component': 'ArticleAuthorInfo'}),
+        dict(name='div', attrs={'class':'caas-content-byline-wrapper'}),
-        classes('notification-upsell-push article-slideshow D(f) rapid-with-clickid athena-button')
+        dict(name='div', attrs={'data-component':'ArticleAuthorInfo'}),
        classes('caas-3p-blocked commerce-disclaimer notification-upsell-push article-slideshow athena-button email-form')
    ]
    feeds = [(u'Posts', u'https://www.engadget.com/rss.xml')]
    def parse_feeds(self):
-        # Call parent's method.
+      # Call parent's method.
-        feeds = BasicNewsRecipe.parse_feeds(self)
+      feeds = BasicNewsRecipe.parse_feeds(self)
-        # Loop through all feeds.
+      # Loop through all feeds.
-        for feed in feeds:
+      for feed in feeds:
-            # Loop through all articles in feed.
+        # Loop through all articles in feed.
-            for article in feed.articles[:]:
+        for article in feed.articles[:]:
-                # Remove articles with '...' in the title.
+          # Remove articles with '...' in the title.
-                if 'best tech deals' in article.title:
+          if 'best tech deals' in article.title:
-                    print('Removing:', article.title)
+              print('Removing:',article.title)
-                    feed.articles.remove(article)
+              feed.articles.remove(article)
-                elif 'Podcast' in article.title:
+          elif 'Podcast' in article.title:
-                    print('Removing:', article.title)
+              print('Removing:',article.title)
-                    feed.articles.remove(article)
+              feed.articles.remove(article)
-                elif 'The Morning After' in article.title:
+          elif 'The Morning After' in article.title:
-                    print('Removing:', article.title)
+              print('Removing:',article.title)
-                    feed.articles.remove(article)
+              feed.articles.remove(article)
-        return feeds
+      return feeds
    def preprocess_html(self, soup):
        for attr in 'data-src data-src-mobile'.split():
            for img in soup.findAll('img'):
                try:
                    ds = img[attr].split()[0]
                    del img[attr]
                except KeyError:
                    continue
                if ds:
                    img['src'] = ds
        for divs in soup.findAll('div'):
            try:
                if divs['style'].split()[0].startswith('padding'):
                    print('Removing padding')
                    del divs['style']
            except KeyError:
                continue
        return soup