mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-09-29 15:31:08 -04:00
62 lines
2.1 KiB
Python
62 lines
2.1 KiB
Python
#!/usr/bin/env python
|
|
from calibre.web.feeds.news import BasicNewsRecipe, prefixed_classes
|
|
|
|
|
|
class bar(BasicNewsRecipe):
|
|
title = 'Bar and Bench'
|
|
__author__ = 'unkn0wn'
|
|
description = (
|
|
'Bar & Bench is the premier online portal for Indian legal news. News, interviews,'
|
|
' and columns related to the Supreme Court of India and the High Courts are published.'
|
|
)
|
|
language = 'en_IN'
|
|
masthead_url = 'https://gumlet.assettype.com/barandbench/2019-12/7a743b15-5d5d-44d7-96c2-13616780ed95/brand_2x.png'
|
|
|
|
no_stylesheets = True
|
|
remove_javascript = True
|
|
remove_attributes = ['height', 'width', 'style']
|
|
|
|
keep_only_tags = [
|
|
prefixed_classes(
|
|
'text-story-m_header-details__ text-story-m_hero-image__ text-story-m_story-content-inner-wrapper__'
|
|
)
|
|
]
|
|
|
|
remove_tags = [
|
|
prefixed_classes(
|
|
'text-story-m_story-tags__ story-footer-module__metype__'
|
|
),
|
|
dict(name='svg')
|
|
]
|
|
|
|
def preprocess_html(self, soup):
|
|
for img in soup.findAll('img', attrs={'data-src':True}):
|
|
img['src'] = img['data-src']
|
|
return soup
|
|
|
|
ignore_duplicate_articles = {'title'}
|
|
resolve_internal_links = True
|
|
remove_empty_feeds = True
|
|
|
|
def parse_index(self):
|
|
index = 'https://www.barandbench.com/'
|
|
sections = [
|
|
'news', 'columns', 'interviews', 'law-firms', 'apprentice-lawyer', 'legal-jobs'
|
|
]
|
|
feeds = []
|
|
soup = self.index_to_soup(index)
|
|
for sec in sections:
|
|
section = sec.capitalize()
|
|
self.log(section)
|
|
articles = []
|
|
for a in soup.findAll('a', attrs={'href':lambda x: x and x.startswith(index + sec + '/')}):
|
|
url = a['href'].split('?')[0]
|
|
if url in {index + sec + '/', index + sec}:
|
|
continue
|
|
title = self.tag_to_string(a)
|
|
self.log('\t', title, '\n\t\t', url)
|
|
articles.append({'title': title, 'url': url})
|
|
if articles:
|
|
feeds.append((section, articles))
|
|
return feeds
|