mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Hindu Businees Line by Dhiru
This commit is contained in:
parent
9b8edf21a2
commit
b3007d8410
53
resources/recipes/hindu_business_line.recipe
Normal file
53
resources/recipes/hindu_business_line.recipe
Normal file
@ -0,0 +1,53 @@
|
|||||||
|
from __future__ import with_statement
|
||||||
|
__license__ = 'GPL 3'
|
||||||
|
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||||
|
|
||||||
|
import re
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class TheHindu(BasicNewsRecipe):
|
||||||
|
title = u'The Business Line'
|
||||||
|
language = 'en_IN'
|
||||||
|
|
||||||
|
oldest_article = 7
|
||||||
|
__author__ = 'Dhiru'
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
no_stylesheets = True
|
||||||
|
|
||||||
|
remove_tags_before = {'name':'font', 'class':'storyhead'}
|
||||||
|
preprocess_regexps = [
|
||||||
|
(re.compile(r'<!-- story ends -->.*', re.DOTALL),
|
||||||
|
lambda match: '</body></html>'),
|
||||||
|
]
|
||||||
|
extra_css = '''
|
||||||
|
.storyhead{font-family:Arial,Helvetica,sans-serif; font-size:large; color:#000099;}
|
||||||
|
body{font-family:Verdana,Arial,Helvetica,sans-serif; font-size:x-small; text-align:left;}
|
||||||
|
'''
|
||||||
|
feeds = [
|
||||||
|
(u'Main - Latest News', u'http://www.thehindubusinessline.com/rss/blnus.xml'),
|
||||||
|
(u'Main - Front Page', u'http://www.thehindubusinessline.com/rss/14hdline.xml'),
|
||||||
|
(u'Main - Corporate', u'http://www.thehindubusinessline.com/rss/02hdline.xml'),
|
||||||
|
(u'Main - Market', u'http://www.thehindubusinessline.com/rss/05hdline.xml'),
|
||||||
|
(u'Main - Opinion', u'http://www.thehindubusinessline.com/rss/04hdline.xml'),
|
||||||
|
(u'Main - Infotech', u'http://www.thehindubusinessline.com/rss/15hdline.xml'),
|
||||||
|
(u'Main - Marketing', u'http://www.thehindubusinessline.com/rss/19hdline.xml'),
|
||||||
|
(u'Main - Money & banking',
|
||||||
|
u'http://www.thehindubusinessline.com/rss/06hdline.xml'),
|
||||||
|
(u'Main - Agri & Commodities', u'http://www.thehindubusinessline.com/rss/07hdline.xml'),
|
||||||
|
(u'Industry',
|
||||||
|
u'http://www.thehindubusinessline.com/rss/03hdline.xml'),
|
||||||
|
(u'Logistic',
|
||||||
|
u'http://www.thehindubusinessline.com/rss/09hdline.xml'),
|
||||||
|
(u'Result', u'http://www.thehindubusinessline.com/rss/26hdline.xml'),
|
||||||
|
(u'Government',
|
||||||
|
u'http://www.thehindubusinessline.com/rss/27hdline.xml'),
|
||||||
|
(u'Investment World',
|
||||||
|
u'http://www.thehindubusinessline.com/rss/iw20hdline.xml'),
|
||||||
|
(u'Supplement - Life',
|
||||||
|
u'http://www.thehindubusinessline.com/rss/lf10hdline.xml')
|
||||||
|
]
|
||||||
|
|
||||||
|
def postprocess_html(self, soup, first_fetch):
|
||||||
|
for t in soup.findAll(['table', 'tr', 'td','center']):
|
||||||
|
t.name = 'div'
|
||||||
|
return soup
|
@ -320,6 +320,7 @@ class MobiReader(object):
|
|||||||
from lxml.html import soupparser
|
from lxml.html import soupparser
|
||||||
self.log.warning('Malformed markup, parsing using BeautifulSoup')
|
self.log.warning('Malformed markup, parsing using BeautifulSoup')
|
||||||
try:
|
try:
|
||||||
|
self.processed_html = self.processed_html.replace('</</', '</')
|
||||||
root = soupparser.fromstring(self.processed_html)
|
root = soupparser.fromstring(self.processed_html)
|
||||||
except Exception:
|
except Exception:
|
||||||
self.log.warning('MOBI markup appears to contain random bytes. Stripping.')
|
self.log.warning('MOBI markup appears to contain random bytes. Stripping.')
|
||||||
|
Loading…
x
Reference in New Issue
Block a user