Update TechCrunch

This commit is contained in:
Kovid Goyal 2024-06-20 11:54:27 +05:30
parent a904cc39a6
commit e8cb43bbac
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C

View File

@ -1,60 +1,56 @@
__license__ = 'GPL v3' #!/usr/bin/env python
__copyright__ = '2011, Darko Miletic <darko.miletic at gmail.com>' # vim:fileencoding=utf-8
'''
techcrunch.com
'''
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1718089036(BasicNewsRecipe):
class TechCrunch(BasicNewsRecipe): title = 'TechCrunch'
title = 'TechCrunch'
__author__ = 'Darko Miletic'
description = 'IT News'
publisher = 'AOL Inc.'
category = 'news, IT'
oldest_article = 2
max_articles_per_feed = 200
no_stylesheets = True
encoding = 'utf8'
use_embedded_content = False
language = 'en' language = 'en'
remove_empty_feeds = True __author__ = 'Spicy Poison'
publication_type = 'newsportal' description = 'TechCrunch is an American global online newspaper focusing on topics regarding high-tech and startup companies.'
masthead_url = 'http://s2.wp.com/wp-content/themes/vip/tctechcrunch2/images/site-logo.png' publisher = 'Yahoo! Inc.'
extra_css = """ oldest_article = 30
body{font-family: Helvetica,Arial,sans-serif } max_articles_per_feed = 50
img{margin-bottom: 0.4em; display:block} ignore_duplicate_articles = {'title', 'url'}
""" encoding = 'utf-8'
masthead_url = 'https://aircoverpr.com/wp-content/uploads/2020/07/techcrunch-logo-png-4.png'
auto_cleanup = True
conversion_options = { feeds = [
'comment': description, 'tags': category, 'publisher': publisher, 'language': language ('TechCrunch', 'https://techcrunch.com/feed/'),
} #Categories
('Apps', 'https://techcrunch.com/category/apps/feed/'),
remove_tags = [dict(name=['meta', 'link'])] ('Artificial Intelligence', 'https://techcrunch.com/category/artificial-intelligence/feed/'),
remove_attributes = ['lang'] ('Biotech', 'https://techcrunch.com/category/biotech-health/feed/'),
keep_only_tags = [ ('Climate', 'https://techcrunch.com/category/climate/feed/'),
dict(name='h1', attrs={'class': 'headline'}), dict( ('Commerce', 'https://techcrunch.com/category/commerce/feed/'),
attrs={'class': ['author', 'post-time', 'body-copy']}) ('Crypto', 'https://techcrunch.com/category/cryptocurrency/feed/'),
] ('Enterprise', 'https://techcrunch.com/category/enterprise/feed/'),
('Fintech', 'https://techcrunch.com/category/fintech/feed'),
feeds = [(u'News', u'http://feeds.feedburner.com/TechCrunch/')] ('Fundraising', 'https://techcrunch.com/category/fundraising/feed/'),
('Gadgets', 'https://techcrunch.com/category/gadgets/feed/'),
def preprocess_html(self, soup): ('Gaming', 'https://techcrunch.com/category/gaming/feed/'),
for item in soup.findAll(style=True): ('Hardware', 'https://techcrunch.com/category/hardware/feed/'),
del item['style'] ('Media & Entertainment', 'https://techcrunch.com/category/media-entertainment/feed/'),
for item in soup.findAll('a'): ('Privacy', 'https://techcrunch.com/category/privacy/feed/'),
limg = item.find('img') ('Robotics', 'https://techcrunch.com/category/robotics/feed/'),
if item.string is not None: ('Security', 'https://techcrunch.com/category/security/feed/'),
str = item.string ('Social Media', 'https://techcrunch.com/category/social/feed/'),
item.replaceWith(str) ('Space', 'https://techcrunch.com/category/space/feed/'),
else: ('Startups', 'https://techcrunch.com/category/startups/feed/'),
if limg: ('Tech Policy & Government', 'https://techcrunch.com/category/government-policy/feed/'),
item.name = 'div' ('Transportation', 'https://techcrunch.com/category/transportation/feed/'),
item.attrs = [] ('Venture Capital', 'https://techcrunch.com/category/venture/feed/'),
else: #Tags
str = self.tag_to_string(item) ('Amazon', 'https://techcrunch.com/tag/amazon/feed/'),
item.replaceWith(str) ('Apple', 'https://techcrunch.com/tag/apple/feed/'),
for item in soup.findAll('img', alt=False): ('Cloud Computing', 'https://techcrunch.com/tag/cloud-computing/feed/'),
item['alt'] = 'image' ('Electric Vehicles', 'https://techcrunch.com/tag/evs/feed/'),
return soup ('Google', 'https://techcrunch.com/tag/google/feed/'),
('Instagram', 'https://techcrunch.com/tag/instagram/feed/'),
('Layoffs', 'https://techcrunch.com/tag/layoffs/feed/'),
('Meta', 'https://techcrunch.com/tag/meta/feed/'),
('Microsoft', 'https://techcrunch.com/tag/microsoft/feed/'),
('TikTok', 'https://techcrunch.com/tag/tiktok/feed/'),
#Other
('Events Archive', 'https://techcrunch.com/events/feed/'),
]