Update The Economic Times India

This commit is contained in:
Kovid Goyal 2016-02-01 08:38:08 +05:30
parent bab3102d8b
commit cc39ed3ba1

View File

@ -16,7 +16,7 @@ class TheEconomicTimes(BasicNewsRecipe):
oldest_article = 1 oldest_article = 1
max_articles_per_feed = 100 max_articles_per_feed = 100
no_stylesheets = True no_stylesheets = True
use_embedded_content = False #use_embedded_content = False
simultaneous_downloads = 1 simultaneous_downloads = 1
encoding = 'utf-8' encoding = 'utf-8'
language = 'en_IN' language = 'en_IN'
@ -36,8 +36,15 @@ class TheEconomicTimes(BasicNewsRecipe):
'publisher' : publisher, 'publisher' : publisher,
'language' : language 'language' : language
} }
remove_tags_before = dict(name='h1') remove_tags_before = dict(name='article')
remove_tags_after = dict(name='div', attrs={'class':'storycontent'}) remove_tags_after = [dict(name='article')]
remove_tags = [dict(name='div', attrs={'class':'cmtLinks'}),
dict(name='div', attrs={'class':'raltedTopics'}),
dict(name='div', attrs={'class':'editorsPick'}),
dict(name='div', attrs={'class':'articleImg etSpecial'}),
dict(name='div', attrs={'class':'articleImg artAd'}),
dict(name='div', attrs={'class':'appPromotion'})
]
remove_attributes = ['xmlns'] remove_attributes = ['xmlns']
feeds = [(u'Top Stories', u'http://economictimes.indiatimes.com/rssfeedstopstories.cms'), feeds = [(u'Top Stories', u'http://economictimes.indiatimes.com/rssfeedstopstories.cms'),
(u'News', u'http://economictimes.indiatimes.com/News/rssfeeds/1715249553.cms'), (u'News', u'http://economictimes.indiatimes.com/News/rssfeeds/1715249553.cms'),
@ -48,17 +55,17 @@ class TheEconomicTimes(BasicNewsRecipe):
(u'Opinion', u'http://economictimes.indiatimes.com/opinion/opinionshome/rssfeeds/897228639.cms'), (u'Opinion', u'http://economictimes.indiatimes.com/opinion/opinionshome/rssfeeds/897228639.cms'),
(u'Features', u'http://economictimes.indiatimes.com/Features/etfeatures/rssfeeds/1466318837.cms'), (u'Features', u'http://economictimes.indiatimes.com/Features/etfeatures/rssfeeds/1466318837.cms'),
(u'Environment', u'http://economictimes.indiatimes.com/rssfeeds/2647163.cms'), (u'Environment', u'http://economictimes.indiatimes.com/rssfeeds/2647163.cms'),
(u'NRI', u'http://economictimes.indiatimes.com/rssfeeds/7771250.cms')] (u'NRI', u'http://economictimes.indiatimes.com/rssfeeds/7771250.cms')
]
# Uses the mobile print version. For web print version use 'http://economictimes.indiatimes.com/articleshow/<article_id>?prtpage=1'
#Uses the mobile print version. For web print version use 'http://economictimes.indiatimes.com/articleshow/<article_id>?prtpage=1'
def print_version(self, url): def print_version(self, url):
rest, sep, article_id = url.rpartition('/articleshow/') rest, sep, article_id = url.rpartition('/articleshow/')
return 'http://m.economictimes.com/PDAET/articleshow/' + article_id # return 'http://m.economictimes.com/PDAET/articleshow/' + article_id
return 'http://economictimes.indiatimes.com/articleshow/' + article_id+ '?prtpage=1'
def get_article_url(self, article): def get_article_url(self, article):
rurl = article.get('guid', None) rurl = article.get('link', None)
if (rurl.find('/quickieslist/') > 0) or (rurl.find('/quickiearticleshow/') > 0): if (rurl.find('/quickieslist/') > 0) or (rurl.find('/quickiearticleshow/') > 0):
return None return None
return rurl return rurl