mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-11-24 15:25:01 -05:00
55 lines
2.2 KiB
Plaintext
55 lines
2.2 KiB
Plaintext
from __future__ import with_statement
|
|
__license__ = 'GPL 3'
|
|
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
|
|
|
|
import re
|
|
from calibre.web.feeds.news import BasicNewsRecipe
|
|
|
|
class TheHindu(BasicNewsRecipe):
|
|
title = u'The Hindu'
|
|
language = 'en_IN'
|
|
|
|
oldest_article = 7
|
|
__author__ = 'Kovid Goyal and Sujata Raman'
|
|
max_articles_per_feed = 100
|
|
no_stylesheets = True
|
|
|
|
remove_tags_before = {'name':'font', 'class':'storyhead'}
|
|
preprocess_regexps = [
|
|
(re.compile(r'<!-- story ends -->.*', re.DOTALL),
|
|
lambda match: '</body></html>'),
|
|
]
|
|
extra_css = '''
|
|
.storyhead{font-family:Arial,Helvetica,sans-serif; font-size:large; color:#000099;}
|
|
body{font-family:Verdana,Arial,Helvetica,sans-serif; font-size:x-small; text-align:left;}
|
|
'''
|
|
feeds = [
|
|
(u'Main - Front Page', u'http://www.hindu.com/rss/01hdline.xml'),
|
|
(u'Main - National', u'http://www.hindu.com/rss/02hdline.xml'),
|
|
(u'Main - International', u'http://www.hindu.com/rss/03hdline.xml'),
|
|
(u'Main - Opinion', u'http://www.hindu.com/rss/05hdline.xml'),
|
|
(u'Main - Business', u'http://www.hindu.com/rss/06hdline.xml'),
|
|
(u'Main - Sport', u'http://www.hindu.com/rss/07hdline.xml'),
|
|
(u'Main - Weather / Religion / Crossword / Cartoon',
|
|
u'http://www.hindu.com/rss/10hdline.xml'),
|
|
(u'Main - Engagements', u'http://www.hindu.com/rss/26hdline.xml'),
|
|
(u'Supplement - Literary Review',
|
|
u'http://www.hindu.com/rss/lrhdline.xml'),
|
|
(u'Supplement - Sunday Magazine',
|
|
u'http://www.hindu.com/rss/maghdline.xml'),
|
|
(u'Supplement - Open Page', u'http://www.hindu.com/rss/ophdline.xml'),
|
|
(u'Supplement - Business Review',
|
|
u'http://www.hindu.com/rss/bizhdline.xml'),
|
|
(u'Supplement - Book Review',
|
|
u'http://www.hindu.com/rss/brhdline.xml'),
|
|
(u'Supplement - Science & Technology',
|
|
u'http://www.hindu.com/rss/setahdline.xml')
|
|
]
|
|
|
|
def postprocess_html(self, soup, first_fetch):
|
|
for t in soup.findAll(['table', 'tr', 'td','center']):
|
|
t.name = 'div'
|
|
|
|
|
|
return soup
|