mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
New recipe for The Hindu by Kovid Goyal
This commit is contained in:
parent
20516bdaa1
commit
3d455a02ea
@ -190,7 +190,7 @@ class BasicNewsRecipe(object, LoggingInterface):
|
||||
#: For the format for specifying a tag see :attr:`BasicNewsRecipe.remove_tags`.
|
||||
#: For example::
|
||||
#:
|
||||
#: remove_tags_before = [dict(id='content')]
|
||||
#: remove_tags_before = dict(id='content')
|
||||
#:
|
||||
#: will remove all
|
||||
#: tags before the first element with `id="content"`.
|
||||
|
@ -29,6 +29,7 @@ recipe_modules = ['recipe_' + r for r in (
|
||||
'jb_online', 'estadao', 'o_globo', 'vijesti', 'elmundo', 'the_oz',
|
||||
'honoluluadvertiser', 'starbulletin', 'exiled', 'indy_star', 'dna',
|
||||
'pobjeda', 'chicago_breaking_news', 'glasgow_herald', 'linuxdevices',
|
||||
'hindu'
|
||||
)]
|
||||
|
||||
import re, imp, inspect, time, os
|
||||
|
47
src/calibre/web/feeds/recipes/recipe_hindu.py
Normal file
47
src/calibre/web/feeds/recipes/recipe_hindu.py
Normal file
@ -0,0 +1,47 @@
|
||||
from __future__ import with_statement
|
||||
__license__ = 'GPL 3'
|
||||
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||
|
||||
import re
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class TheHindu(BasicNewsRecipe):
|
||||
title = u'The Hindu'
|
||||
language = _('English')
|
||||
oldest_article = 7
|
||||
__author__ = _('Kovid Goyal')
|
||||
max_articles_per_feed = 100
|
||||
|
||||
remove_tags_before = {'name':'font', 'class':'storyhead'}
|
||||
preprocess_regexps = [
|
||||
(re.compile(r'<!-- story ends -->.*', re.DOTALL),
|
||||
lambda match: '</body></html>'),
|
||||
]
|
||||
|
||||
feeds = [
|
||||
(u'Main - Font Page', u'http://www.hindu.com/rss/01hdline.xml'),
|
||||
(u'Main - National', u'http://www.hindu.com/rss/02hdline.xml'),
|
||||
(u'Main - International', u'http://www.hindu.com/rss/03hdline.xml'),
|
||||
(u'Main - Opinion', u'http://www.hindu.com/rss/05hdline.xml'),
|
||||
(u'Main - Business', u'http://www.hindu.com/rss/06hdline.xml'),
|
||||
(u'Main - Sport', u'http://www.hindu.com/rss/07hdline.xml'),
|
||||
(u'Main - Weather / Religion / Crossword / Cartoon',
|
||||
u'http://www.hindu.com/rss/10hdline.xml'),
|
||||
(u'Main - Engagements', u'http://www.hindu.com/rss/26hdline.xml'),
|
||||
(u'Supplement - Literary Review',
|
||||
u'http://www.hindu.com/rss/lrhdline.xml'),
|
||||
(u'Supplement - Sunday Magazine',
|
||||
u'http://www.hindu.com/rss/maghdline.xml'),
|
||||
(u'Supplement - Open Page', u'http://www.hindu.com/rss/ophdline.xml'),
|
||||
(u'Supplement - Business Review',
|
||||
u'http://www.hindu.com/rss/bizhdline.xml'),
|
||||
(u'Supplement - Book Review',
|
||||
u'http://www.hindu.com/rss/brhdline.xml'),
|
||||
(u'Supplement - Science & Technology',
|
||||
u'http://www.hindu.com/rss/setahdline.xml')
|
||||
]
|
||||
|
||||
def postprocess_html(self, soup, first_fetch):
|
||||
for t in soup.findAll(['table', 'tr', 'td']):
|
||||
t.name = 'div'
|
||||
return soup
|
Loading…
x
Reference in New Issue
Block a user