Telegraph India and Live Mint by Krittika Goyal

2025-07-09 03:04:10 -04:00 · 2012-01-31 16:52:35 +05:30 · 2012-01-31 16:52:35 +05:30 · 205d323bf7
commit 205d323bf7
parent b1092c7aaa
3 changed files with 58 additions and 36 deletions
--- a/recipes/livemint.recipe
+++ b/recipes/livemint.recipe
@ -1,41 +1,27 @@
-#!/usr/bin/env  python
-
-__license__   = 'GPL v3'
-__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
-'''
-www.livemint.com
-'''
-
 from calibre.web.feeds.news import BasicNewsRecipe
+from calibre.ebooks.BeautifulSoup import BeautifulSoup

 class LiveMint(BasicNewsRecipe):
-    title                 = u'Livemint'
-    __author__            = 'Darko Miletic'
-    description           = 'The Wall Street Journal'
-    publisher             = 'The Wall Street Journal'
-    category              = 'news, games, adventure, technology'
-    language = 'en'
+    title          = u'Live Mint'
+    language       = 'en_IN'
+    __author__     = 'Krittika Goyal'
+    #encoding = 'cp1252'
+    oldest_article = 1 #days
+    max_articles_per_feed = 25
+    use_embedded_content = True

-    oldest_article        = 15
-    max_articles_per_feed = 100
-    no_stylesheets        = True
-    encoding              = 'utf-8'
-    use_embedded_content  = False
-    extra_css             = ' #dvArtheadline{font-size: x-large} #dvArtAbstract{font-size: large} '
+    no_stylesheets = True
+    auto_cleanup = True

-    keep_only_tags = [dict(name='div', attrs={'class':'innercontent'})]

-    remove_tags = [dict(name=['object','link','embed','form','iframe'])]
+    feeds          = [
+('Latest News', 
+ 'http://www.livemint.com/StoryRss.aspx?LN=Latestnews'),
+ ('Gallery', 
+ 'http://www.livemint.com/GalleryRssfeed.aspx'),
+ ('Top Stories', 
+ 'http://www.livemint.com/StoryRss.aspx?ts=Topstories'),
+ ('Banking', 
+ 'http://www.livemint.com/StoryRss.aspx?Id=104'),
+]

-    feeds = [(u'Articles', u'http://www.livemint.com/SectionRssfeed.aspx?Mid=1')]
-
-    def print_version(self, url):
-        link = url
-        msoup = self.index_to_soup(link)
-        mlink = msoup.find(attrs={'id':'ctl00_bodyplaceholdercontent_cntlArtTool_printUrl'})
-        if mlink:
-           link = 'http://www.livemint.com/Articles/' + mlink['href'].rpartition('/Articles/')[2]
-        return link
-
-    def preprocess_html(self, soup):
-        return self.adeify_images(soup)
--- a/recipes/telegraph_in.recipe
+++ b/recipes/telegraph_in.recipe
@ -0,0 +1,37 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class Telegraph(BasicNewsRecipe):
+    title          = u'The Telegraph India'
+    language       = 'en_IN'
+    __author__     = 'Krittika Goyal'
+    oldest_article = 1 #days
+    max_articles_per_feed = 25
+    use_embedded_content = False
+
+    no_stylesheets = True
+    auto_cleanup = True
+
+
+    feeds          = [
+('Front Page',
+ 'http://www.telegraphindia.com/feeds/rss.jsp?id=3'),
+ ('Nation',
+ 'http://www.telegraphindia.com/feeds/rss.jsp?id=4'),
+ ('Calcutta',
+ 'http://www.telegraphindia.com/feeds/rss.jsp?id=5'),
+ ('Bengal',
+ 'http://www.telegraphindia.com/feeds/rss.jsp?id=8'),
+ ('Bihar',
+ 'http://www.telegraphindia.com/feeds/rss.jsp?id=22'),
+ ('Sports',
+ 'http://www.telegraphindia.com/feeds/rss.jsp?id=7'),
+ ('International',
+ 'http://www.telegraphindia.com/feeds/rss.jsp?id=13'),
+ ('Business',
+ 'http://www.telegraphindia.com/feeds/rss.jsp?id=9'),
+ ('Entertainment',
+ 'http://www.telegraphindia.com/feeds/rss.jsp?id=20'),
+ ('Opinion',
+ 'http://www.telegraphindia.com/feeds/rss.jsp?id=6'),
+]
+
--- a/src/calibre/web/feeds/recipes/collection.py
+++ b/src/calibre/web/feeds/recipes/collection.py
@ -21,8 +21,7 @@ NS = 'http://calibre-ebook.com/recipe_collection'
 E = ElementMaker(namespace=NS, nsmap={None:NS})

 def iterate_over_builtin_recipe_files():
-    exclude = ['craigslist', 'toronto_sun',
-            'livemint']
+    exclude = ['craigslist', 'toronto_sun']
    d = os.path.dirname
    base = os.path.join(d(d(d(d(d(d(os.path.abspath(__file__))))))), 'recipes')
    for f in os.listdir(base):