Telegraph India and Live Mint by Krittika Goyal

This commit is contained in:
Kovid Goyal 2012-01-31 16:52:35 +05:30
parent b1092c7aaa
commit 205d323bf7
3 changed files with 58 additions and 36 deletions

View File

@ -1,41 +1,27 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
'''
www.livemint.com
'''
from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import BeautifulSoup
class LiveMint(BasicNewsRecipe):
title = u'Livemint'
__author__ = 'Darko Miletic'
description = 'The Wall Street Journal'
publisher = 'The Wall Street Journal'
category = 'news, games, adventure, technology'
language = 'en'
title = u'Live Mint'
language = 'en_IN'
__author__ = 'Krittika Goyal'
#encoding = 'cp1252'
oldest_article = 1 #days
max_articles_per_feed = 25
use_embedded_content = True
oldest_article = 15
max_articles_per_feed = 100
no_stylesheets = True
encoding = 'utf-8'
use_embedded_content = False
extra_css = ' #dvArtheadline{font-size: x-large} #dvArtAbstract{font-size: large} '
no_stylesheets = True
auto_cleanup = True
keep_only_tags = [dict(name='div', attrs={'class':'innercontent'})]
remove_tags = [dict(name=['object','link','embed','form','iframe'])]
feeds = [
('Latest News',
'http://www.livemint.com/StoryRss.aspx?LN=Latestnews'),
('Gallery',
'http://www.livemint.com/GalleryRssfeed.aspx'),
('Top Stories',
'http://www.livemint.com/StoryRss.aspx?ts=Topstories'),
('Banking',
'http://www.livemint.com/StoryRss.aspx?Id=104'),
]
feeds = [(u'Articles', u'http://www.livemint.com/SectionRssfeed.aspx?Mid=1')]
def print_version(self, url):
link = url
msoup = self.index_to_soup(link)
mlink = msoup.find(attrs={'id':'ctl00_bodyplaceholdercontent_cntlArtTool_printUrl'})
if mlink:
link = 'http://www.livemint.com/Articles/' + mlink['href'].rpartition('/Articles/')[2]
return link
def preprocess_html(self, soup):
return self.adeify_images(soup)

View File

@ -0,0 +1,37 @@
from calibre.web.feeds.news import BasicNewsRecipe
class Telegraph(BasicNewsRecipe):
title = u'The Telegraph India'
language = 'en_IN'
__author__ = 'Krittika Goyal'
oldest_article = 1 #days
max_articles_per_feed = 25
use_embedded_content = False
no_stylesheets = True
auto_cleanup = True
feeds = [
('Front Page',
'http://www.telegraphindia.com/feeds/rss.jsp?id=3'),
('Nation',
'http://www.telegraphindia.com/feeds/rss.jsp?id=4'),
('Calcutta',
'http://www.telegraphindia.com/feeds/rss.jsp?id=5'),
('Bengal',
'http://www.telegraphindia.com/feeds/rss.jsp?id=8'),
('Bihar',
'http://www.telegraphindia.com/feeds/rss.jsp?id=22'),
('Sports',
'http://www.telegraphindia.com/feeds/rss.jsp?id=7'),
('International',
'http://www.telegraphindia.com/feeds/rss.jsp?id=13'),
('Business',
'http://www.telegraphindia.com/feeds/rss.jsp?id=9'),
('Entertainment',
'http://www.telegraphindia.com/feeds/rss.jsp?id=20'),
('Opinion',
'http://www.telegraphindia.com/feeds/rss.jsp?id=6'),
]

View File

@ -21,8 +21,7 @@ NS = 'http://calibre-ebook.com/recipe_collection'
E = ElementMaker(namespace=NS, nsmap={None:NS})
def iterate_over_builtin_recipe_files():
exclude = ['craigslist', 'toronto_sun',
'livemint']
exclude = ['craigslist', 'toronto_sun']
d = os.path.dirname
base = os.path.join(d(d(d(d(d(d(os.path.abspath(__file__))))))), 'recipes')
for f in os.listdir(base):