mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Merge branch 'master' of https://github.com/unkn0w7n/calibre
This commit is contained in:
commit
fb3d163796
BIN
recipes/icons/scroll.png
Normal file
BIN
recipes/icons/scroll.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 298 B |
63
recipes/scroll.recipe
Normal file
63
recipes/scroll.recipe
Normal file
@ -0,0 +1,63 @@
|
|||||||
|
from calibre.web.feeds.news import BasicNewsRecipe, classes
|
||||||
|
from calibre.ptempfile import PersistentTemporaryFile
|
||||||
|
|
||||||
|
class scroll(BasicNewsRecipe):
|
||||||
|
title = 'Scroll.in'
|
||||||
|
__author__ = 'unkn0wn'
|
||||||
|
description = (
|
||||||
|
'The leading destination for original reporting on news, politics, and culture in India. '
|
||||||
|
'Our award-winning team of journalists brings readers insightful analysis and opinion on the day’s '
|
||||||
|
'headlines alongside a fresh mix of features on music, books, and cinema.'
|
||||||
|
)
|
||||||
|
language = 'en_IN'
|
||||||
|
masthead_url = 'https://scroll.in/static/assets/scroll-logo.0f68c78dd023e2598248ea107feba562.003.svg'
|
||||||
|
|
||||||
|
no_stylesheets = True
|
||||||
|
remove_javascript = True
|
||||||
|
|
||||||
|
ignore_duplicate_articles = {'title', 'url'}
|
||||||
|
remove_attributes = ['style', 'height', 'width']
|
||||||
|
|
||||||
|
articles_are_obfuscated = True
|
||||||
|
|
||||||
|
def get_obfuscated_article(self, url):
|
||||||
|
br = self.get_browser()
|
||||||
|
try:
|
||||||
|
br.open(url)
|
||||||
|
except Exception as e:
|
||||||
|
url = e.hdrs.get('location')
|
||||||
|
soup = self.index_to_soup(url)
|
||||||
|
link = soup.find('a', href=True)
|
||||||
|
skip_sections =[ # add sections you want to skip
|
||||||
|
'/video/', '/videos/', '/announcements/'
|
||||||
|
]
|
||||||
|
if any(x in link['href'] for x in skip_sections):
|
||||||
|
self.log('Aborting Article ', link['href'])
|
||||||
|
self.abort_article('skipping video links')
|
||||||
|
|
||||||
|
self.log('Downloading ', link['href'])
|
||||||
|
html = br.open(link['href']).read()
|
||||||
|
pt = PersistentTemporaryFile('.html')
|
||||||
|
pt.write(html)
|
||||||
|
pt.close()
|
||||||
|
return pt.name
|
||||||
|
|
||||||
|
extra_css = '''
|
||||||
|
.orange-tag, .article-meta-container { font-size:small; }
|
||||||
|
.featured-image, .cms-block-image { text-align:center; font-size:small; }
|
||||||
|
'''
|
||||||
|
|
||||||
|
keep_only_tags = [
|
||||||
|
dict(name = 'header'),
|
||||||
|
classes('featured-image article-body')
|
||||||
|
]
|
||||||
|
|
||||||
|
remove_tags = [classes('comments-entry-point-meta')]
|
||||||
|
|
||||||
|
feeds = [('Articles', 'https://news.google.com/rss/search?q=when:27h+allinurl:https%3A%2F%2Fscroll.in&hl=en-IN&gl=IN&ceid=IN:en')]
|
||||||
|
|
||||||
|
def populate_article_metadata(self, article, soup, first):
|
||||||
|
# article.url = ''
|
||||||
|
article.summary = self.tag_to_string(soup.find('h2'))
|
||||||
|
article.text_summary = self.tag_to_string(soup.find('h2'))
|
||||||
|
article.title = article.title.replace(' - Scroll.in', '')
|
@ -15,7 +15,7 @@ class TheWashingtonPost(BasicNewsRecipe):
|
|||||||
description = 'Leading source for news, video and opinion on politics, business, world and national news, science, travel, entertainment and more. Our local coverage includes reporting on education, crime, weather, traffic, real estate, jobs and cars for DC, Maryland and Virginia. Offering award-winning opinion writing, entertainment information and restaurant reviews.' # noqa
|
description = 'Leading source for news, video and opinion on politics, business, world and national news, science, travel, entertainment and more. Our local coverage includes reporting on education, crime, weather, traffic, real estate, jobs and cars for DC, Maryland and Virginia. Offering award-winning opinion writing, entertainment information and restaurant reviews.' # noqa
|
||||||
publisher = 'The Washington Post Company'
|
publisher = 'The Washington Post Company'
|
||||||
category = 'news, politics, USA'
|
category = 'news, politics, USA'
|
||||||
oldest_article = 2
|
oldest_article = 1.2
|
||||||
max_articles_per_feed = 200
|
max_articles_per_feed = 200
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
encoding = 'utf8'
|
encoding = 'utf8'
|
||||||
|
Loading…
x
Reference in New Issue
Block a user