mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Fix Times of India
This commit is contained in:
parent
6bf872b5be
commit
0f138cd43c
@ -9,11 +9,12 @@ class TimesOfIndia(BasicNewsRecipe):
|
|||||||
max_articles_per_feed = 25
|
max_articles_per_feed = 25
|
||||||
|
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
keep_only_tags = [{'class':['maintable12', 'prttabl']}]
|
remove_attributes = ['style']
|
||||||
|
keep_only_tags = [{'class':re.compile(r'maintable12|prttabl')}]
|
||||||
remove_tags = [
|
remove_tags = [
|
||||||
dict(style=lambda x: x and 'float' in x),
|
{'class':re.compile('tabsintbgshow|prvnxtbg')},
|
||||||
{'class':['prvnxtbg', 'footbdrin', 'bcclftr']},
|
{'id':['fbrecommend', 'relmaindiv']}
|
||||||
]
|
]
|
||||||
|
|
||||||
feeds = [
|
feeds = [
|
||||||
('Top Stories',
|
('Top Stories',
|
||||||
@ -41,6 +42,8 @@ class TimesOfIndia(BasicNewsRecipe):
|
|||||||
]
|
]
|
||||||
|
|
||||||
def get_article_url(self, article):
|
def get_article_url(self, article):
|
||||||
|
# Times of India sometimes serves an ad page instead of the article,
|
||||||
|
# this code, detects and circumvents that
|
||||||
url = BasicNewsRecipe.get_article_url(self, article)
|
url = BasicNewsRecipe.get_article_url(self, article)
|
||||||
if '/0Ltimesofindia' in url:
|
if '/0Ltimesofindia' in url:
|
||||||
url = url.partition('/0L')[-1]
|
url = url.partition('/0L')[-1]
|
||||||
@ -61,6 +64,3 @@ class TimesOfIndia(BasicNewsRecipe):
|
|||||||
|
|
||||||
return url
|
return url
|
||||||
|
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
|
||||||
return soup
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user