New recipe for DNA: India by Kovid Goyal. Also updated Outlook India recipe to work with EPUB output

2025-08-11 09:13:57 -04:00 · 2009-02-08 22:29:39 -08:00 · 2009-02-08 22:29:39 -08:00 · ffee7f8da1
commit ffee7f8da1
parent 6d5648fbdc
3 changed files with 46 additions and 3 deletions
--- a/src/calibre/web/feeds/recipes/init.py
+++ b/src/calibre/web/feeds/recipes/init.py
@ -27,7 +27,7 @@ recipe_modules = ['recipe_' + r for r in (
           'shacknews', 'teleread', 'granma', 'juventudrebelde', 'juventudrebelde_english',
           'la_tercera', 'el_mercurio_chile', 'la_cuarta', 'lanacion_chile', 'la_segunda',
           'jb_online', 'estadao', 'o_globo', 'vijesti', 'elmundo', 'the_oz',
-           'honoluluadvertiser', 'starbulletin', 'exiled', 'indy_star',
+           'honoluluadvertiser', 'starbulletin', 'exiled', 'indy_star', 'dna',
          )]

 import re, imp, inspect, time, os
--- a/src/calibre/web/feeds/recipes/recipe_dna.py
+++ b/src/calibre/web/feeds/recipes/recipe_dna.py
@ -0,0 +1,41 @@
+'''
+dnaindia.com
+'''
+import re
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class DNAIndia(BasicNewsRecipe):
+    
+    title       = 'DNA India'
+    description = 'Mumbai news, India news, World news, breaking news'
+    __author__  = 'Kovid Goyal'
+    language    = _('English')
+    
+    feeds       = [
+                   ('Top News', 'http://www.dnaindia.com/syndication/rss_topnews.xml'),
+                   ('Popular News', 'http://www.dnaindia.com/syndication/rss_popular.xml'),
+                   ('Recent Columns', 'http://www.dnaindia.com/syndication/rss_column.xml'),
+                   ('Mumbai', 'http://www.dnaindia.com/syndication/rss,catid-1.xml'),
+                   ('India', 'http://www.dnaindia.com/syndication/rss,catid-2.xml'),
+                   ('World', 'http://www.dnaindia.com/syndication/rss,catid-9.xml'),
+                   ('Money', 'http://www.dnaindia.com/syndication/rss,catid-4.xml'),
+                   ('Sports', 'http://www.dnaindia.com/syndication/rss,catid-6.xml'),
+                   ('After Hours', 'http://www.dnaindia.com/syndication/rss,catid-7.xml'),
+                   ('Digital Life', 'http://www.dnaindia.com/syndication/rss,catid-1089741.xml'),                   
+                   ]
+    remove_tags = [{'id':'footer'}, {'class':['bottom', 'categoryHead']}]
+    
+    def print_version(self, url):
+        match = re.search(r'newsid=(\d+)', url)
+        if not match:
+            return url
+        return 'http://www.dnaindia.com/dnaprint.asp?newsid='+match.group(1)
+    
+    def postprocess_html(self, soup, first_fetch):
+        for t in soup.findAll(['table', 'tr', 'td']):
+            t.name = 'div'
+            
+        a = soup.find(href='http://www.3dsyndication.com/')
+        if a is not None:
+            a.parent.extract()
+        return soup
--- a/src/calibre/web/feeds/recipes/recipe_outlook_india.py
+++ b/src/calibre/web/feeds/recipes/recipe_outlook_india.py
@ -13,11 +13,10 @@ class OutlookIndia(BasicNewsRecipe):
    
    title = 'Outlook India'
    __author__  = 'Kovid Goyal'
-    description = 'Weekly news magazine focussed on India.'
+    description = 'Weekly news magazine focused on India.'
    language = _('English')
    recursions = 1
    match_regexp = r'full.asp.*&pn=\d+'
-    html2lrf_options = ['--ignore-tables']
    
    remove_tags = [
                   dict(name='img', src="images/space.gif"),
@ -81,5 +80,8 @@ class OutlookIndia(BasicNewsRecipe):
                bad.append(table)
        for b in bad:
            b.extract()
+        soup = soup.findAll('html')[0]
+        for t in soup.findAll(['table', 'tr', 'td']):
+            t.name = 'div' 
        return soup