New recipe for DNA: India by Kovid Goyal. Also updated Outlook India recipe to work with EPUB output

This commit is contained in:
Kovid Goyal 2009-02-08 22:29:39 -08:00
parent 6d5648fbdc
commit ffee7f8da1
3 changed files with 46 additions and 3 deletions

View File

@ -27,7 +27,7 @@ recipe_modules = ['recipe_' + r for r in (
'shacknews', 'teleread', 'granma', 'juventudrebelde', 'juventudrebelde_english',
'la_tercera', 'el_mercurio_chile', 'la_cuarta', 'lanacion_chile', 'la_segunda',
'jb_online', 'estadao', 'o_globo', 'vijesti', 'elmundo', 'the_oz',
'honoluluadvertiser', 'starbulletin', 'exiled', 'indy_star',
'honoluluadvertiser', 'starbulletin', 'exiled', 'indy_star', 'dna',
)]
import re, imp, inspect, time, os

View File

@ -0,0 +1,41 @@
'''
dnaindia.com
'''
import re
from calibre.web.feeds.news import BasicNewsRecipe
class DNAIndia(BasicNewsRecipe):
title = 'DNA India'
description = 'Mumbai news, India news, World news, breaking news'
__author__ = 'Kovid Goyal'
language = _('English')
feeds = [
('Top News', 'http://www.dnaindia.com/syndication/rss_topnews.xml'),
('Popular News', 'http://www.dnaindia.com/syndication/rss_popular.xml'),
('Recent Columns', 'http://www.dnaindia.com/syndication/rss_column.xml'),
('Mumbai', 'http://www.dnaindia.com/syndication/rss,catid-1.xml'),
('India', 'http://www.dnaindia.com/syndication/rss,catid-2.xml'),
('World', 'http://www.dnaindia.com/syndication/rss,catid-9.xml'),
('Money', 'http://www.dnaindia.com/syndication/rss,catid-4.xml'),
('Sports', 'http://www.dnaindia.com/syndication/rss,catid-6.xml'),
('After Hours', 'http://www.dnaindia.com/syndication/rss,catid-7.xml'),
('Digital Life', 'http://www.dnaindia.com/syndication/rss,catid-1089741.xml'),
]
remove_tags = [{'id':'footer'}, {'class':['bottom', 'categoryHead']}]
def print_version(self, url):
match = re.search(r'newsid=(\d+)', url)
if not match:
return url
return 'http://www.dnaindia.com/dnaprint.asp?newsid='+match.group(1)
def postprocess_html(self, soup, first_fetch):
for t in soup.findAll(['table', 'tr', 'td']):
t.name = 'div'
a = soup.find(href='http://www.3dsyndication.com/')
if a is not None:
a.parent.extract()
return soup

View File

@ -13,11 +13,10 @@ class OutlookIndia(BasicNewsRecipe):
title = 'Outlook India'
__author__ = 'Kovid Goyal'
description = 'Weekly news magazine focussed on India.'
description = 'Weekly news magazine focused on India.'
language = _('English')
recursions = 1
match_regexp = r'full.asp.*&pn=\d+'
html2lrf_options = ['--ignore-tables']
remove_tags = [
dict(name='img', src="images/space.gif"),
@ -81,5 +80,8 @@ class OutlookIndia(BasicNewsRecipe):
bad.append(table)
for b in bad:
b.extract()
soup = soup.findAll('html')[0]
for t in soup.findAll(['table', 'tr', 'td']):
t.name = 'div'
return soup