mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Update The Hindu
This commit is contained in:
parent
1eeb0c7c92
commit
09217dd851
@ -6,6 +6,12 @@ from calibre.web.feeds.news import BasicNewsRecipe
|
|||||||
import string
|
import string
|
||||||
|
|
||||||
|
|
||||||
|
def classes(classes):
|
||||||
|
q = frozenset(classes.split(' '))
|
||||||
|
return dict(
|
||||||
|
attrs={'class': lambda x: x and frozenset(x.split()).intersection(q)})
|
||||||
|
|
||||||
|
|
||||||
class TheHindu(BasicNewsRecipe):
|
class TheHindu(BasicNewsRecipe):
|
||||||
title = u'The Hindu'
|
title = u'The Hindu'
|
||||||
language = 'en_IN'
|
language = 'en_IN'
|
||||||
@ -14,11 +20,19 @@ class TheHindu(BasicNewsRecipe):
|
|||||||
__author__ = 'Kovid Goyal'
|
__author__ = 'Kovid Goyal'
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
|
remove_attributes = ['style']
|
||||||
|
|
||||||
auto_cleanup = True
|
|
||||||
ignore_duplicate_articles = {'title', 'url'}
|
ignore_duplicate_articles = {'title', 'url'}
|
||||||
|
keep_only_tags = [
|
||||||
|
dict(name='h1', attrs={'class': 'title'}),
|
||||||
|
classes('lead-img-cont mobile-author-cont'),
|
||||||
|
dict(id=lambda x: x and x.startswith('content-body-')),
|
||||||
|
]
|
||||||
|
|
||||||
extra_css = '.photo-caption { font-size: smaller }'
|
def preprocess_html(self, soup):
|
||||||
|
for img in soup.findAll('img', attrs={'data-src-template': True}):
|
||||||
|
img['src'] = img['data-src-template'].replace('BINARY/thumbnail', 'alternates/FREE_660')
|
||||||
|
return soup
|
||||||
|
|
||||||
def articles_from_soup(self, soup):
|
def articles_from_soup(self, soup):
|
||||||
ans = []
|
ans = []
|
||||||
@ -34,8 +48,11 @@ class TheHindu(BasicNewsRecipe):
|
|||||||
continue
|
continue
|
||||||
self.log('\t\tFound article:', title)
|
self.log('\t\tFound article:', title)
|
||||||
self.log('\t\t\t', url)
|
self.log('\t\t\t', url)
|
||||||
ans.append({'title': title, 'url': url,
|
ans.append({
|
||||||
'description': '', 'date': ''})
|
'title': title,
|
||||||
|
'url': url,
|
||||||
|
'description': '',
|
||||||
|
'date': ''})
|
||||||
return ans
|
return ans
|
||||||
|
|
||||||
def parse_index(self):
|
def parse_index(self):
|
||||||
@ -64,21 +81,11 @@ class TheHindu(BasicNewsRecipe):
|
|||||||
|
|
||||||
def is_accepted_entry(self, entry):
|
def is_accepted_entry(self, entry):
|
||||||
# Those sections in the top nav bar that we will omit
|
# Those sections in the top nav bar that we will omit
|
||||||
omit_list = ['tp-tamilnadu',
|
omit_list = [
|
||||||
'tp-karnataka',
|
'tp-tamilnadu', 'tp-karnataka', 'tp-kerala', 'tp-andhrapradesh',
|
||||||
'tp-kerala',
|
'tp-telangana', 'tp-newdelhi', 'tp-mumbai', 'tp-otherstates',
|
||||||
'tp-andhrapradesh',
|
'tp-in-school', 'tp-metroplus', 'tp-youngworld', 'tp-fridayreview',
|
||||||
'tp-telangana',
|
'tp-downtown', 'tp-bookreview', 'tp-others']
|
||||||
'tp-newdelhi',
|
|
||||||
'tp-mumbai',
|
|
||||||
'tp-otherstates',
|
|
||||||
'tp-in-school',
|
|
||||||
'tp-metroplus',
|
|
||||||
'tp-youngworld',
|
|
||||||
'tp-fridayreview',
|
|
||||||
'tp-downtown',
|
|
||||||
'tp-bookreview',
|
|
||||||
'tp-others']
|
|
||||||
|
|
||||||
is_accepted = True
|
is_accepted = True
|
||||||
for omit_entry in omit_list:
|
for omit_entry in omit_list:
|
||||||
|
Loading…
x
Reference in New Issue
Block a user