mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-06-23 15:30:45 -04:00
New recipe for DNA: India by Kovid Goyal. Also updated Outlook India recipe to work with EPUB output
This commit is contained in:
parent
6d5648fbdc
commit
ffee7f8da1
@ -27,7 +27,7 @@ recipe_modules = ['recipe_' + r for r in (
|
||||
'shacknews', 'teleread', 'granma', 'juventudrebelde', 'juventudrebelde_english',
|
||||
'la_tercera', 'el_mercurio_chile', 'la_cuarta', 'lanacion_chile', 'la_segunda',
|
||||
'jb_online', 'estadao', 'o_globo', 'vijesti', 'elmundo', 'the_oz',
|
||||
'honoluluadvertiser', 'starbulletin', 'exiled', 'indy_star',
|
||||
'honoluluadvertiser', 'starbulletin', 'exiled', 'indy_star', 'dna',
|
||||
)]
|
||||
|
||||
import re, imp, inspect, time, os
|
||||
|
41
src/calibre/web/feeds/recipes/recipe_dna.py
Normal file
41
src/calibre/web/feeds/recipes/recipe_dna.py
Normal file
@ -0,0 +1,41 @@
|
||||
'''
|
||||
dnaindia.com
|
||||
'''
|
||||
import re
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class DNAIndia(BasicNewsRecipe):
|
||||
|
||||
title = 'DNA India'
|
||||
description = 'Mumbai news, India news, World news, breaking news'
|
||||
__author__ = 'Kovid Goyal'
|
||||
language = _('English')
|
||||
|
||||
feeds = [
|
||||
('Top News', 'http://www.dnaindia.com/syndication/rss_topnews.xml'),
|
||||
('Popular News', 'http://www.dnaindia.com/syndication/rss_popular.xml'),
|
||||
('Recent Columns', 'http://www.dnaindia.com/syndication/rss_column.xml'),
|
||||
('Mumbai', 'http://www.dnaindia.com/syndication/rss,catid-1.xml'),
|
||||
('India', 'http://www.dnaindia.com/syndication/rss,catid-2.xml'),
|
||||
('World', 'http://www.dnaindia.com/syndication/rss,catid-9.xml'),
|
||||
('Money', 'http://www.dnaindia.com/syndication/rss,catid-4.xml'),
|
||||
('Sports', 'http://www.dnaindia.com/syndication/rss,catid-6.xml'),
|
||||
('After Hours', 'http://www.dnaindia.com/syndication/rss,catid-7.xml'),
|
||||
('Digital Life', 'http://www.dnaindia.com/syndication/rss,catid-1089741.xml'),
|
||||
]
|
||||
remove_tags = [{'id':'footer'}, {'class':['bottom', 'categoryHead']}]
|
||||
|
||||
def print_version(self, url):
|
||||
match = re.search(r'newsid=(\d+)', url)
|
||||
if not match:
|
||||
return url
|
||||
return 'http://www.dnaindia.com/dnaprint.asp?newsid='+match.group(1)
|
||||
|
||||
def postprocess_html(self, soup, first_fetch):
|
||||
for t in soup.findAll(['table', 'tr', 'td']):
|
||||
t.name = 'div'
|
||||
|
||||
a = soup.find(href='http://www.3dsyndication.com/')
|
||||
if a is not None:
|
||||
a.parent.extract()
|
||||
return soup
|
@ -13,11 +13,10 @@ class OutlookIndia(BasicNewsRecipe):
|
||||
|
||||
title = 'Outlook India'
|
||||
__author__ = 'Kovid Goyal'
|
||||
description = 'Weekly news magazine focussed on India.'
|
||||
description = 'Weekly news magazine focused on India.'
|
||||
language = _('English')
|
||||
recursions = 1
|
||||
match_regexp = r'full.asp.*&pn=\d+'
|
||||
html2lrf_options = ['--ignore-tables']
|
||||
|
||||
remove_tags = [
|
||||
dict(name='img', src="images/space.gif"),
|
||||
@ -81,5 +80,8 @@ class OutlookIndia(BasicNewsRecipe):
|
||||
bad.append(table)
|
||||
for b in bad:
|
||||
b.extract()
|
||||
soup = soup.findAll('html')[0]
|
||||
for t in soup.findAll(['table', 'tr', 'td']):
|
||||
t.name = 'div'
|
||||
return soup
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user