mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Update Business Week Magazine
This commit is contained in:
parent
766fb9c400
commit
4c9c7aad6f
@ -5,31 +5,34 @@ from collections import OrderedDict
|
|||||||
class BusinessWeekMagazine(BasicNewsRecipe):
|
class BusinessWeekMagazine(BasicNewsRecipe):
|
||||||
|
|
||||||
title = 'Business Week Magazine'
|
title = 'Business Week Magazine'
|
||||||
__author__ = 'Rick Shang'
|
__author__ = 'Rick Shang, Armin Geller' # AGE Upd 2014-01-18
|
||||||
|
|
||||||
description = 'A renowned business publication. Business news, trends and profiles of successful businesspeople.'
|
description = 'A renowned business publication. Business news, trends and profiles of successful businesspeople.'
|
||||||
language = 'en'
|
language = 'en'
|
||||||
category = 'news'
|
category = 'news'
|
||||||
encoding = 'UTF-8'
|
encoding = 'UTF-8'
|
||||||
keep_only_tags = [
|
keep_only_tags = [
|
||||||
dict(name='div', attrs={'id':['article_body_container','story_body']}),
|
dict(name='div', attrs={'id':['content']}), # AGE 2014-01-18
|
||||||
]
|
]
|
||||||
remove_tags = [dict(name='ui'),dict(name='li'),dict(name='div', attrs={'id':['share-email']})]
|
remove_tags = [dict(name='hr'),
|
||||||
|
dict(name='a', attrs={'class':'sub_sales'}),
|
||||||
|
dict(name='div', attrs={'class':'fieldset'}),
|
||||||
|
dict(name='div', attrs={'id':'taboola_wrapper'})] # AGE 2014-01-18
|
||||||
no_javascript = True
|
no_javascript = True
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
|
|
||||||
cover_url = 'http://images.businessweek.com/mz/covers/current_120x160.jpg'
|
cover_url = 'http://images.businessweek.com/mz/covers/current_120x160.jpg'
|
||||||
|
|
||||||
def parse_index(self):
|
def parse_index(self):
|
||||||
#Go to the issue
|
# Go to the issue
|
||||||
soup = self.index_to_soup('http://www.businessweek.com/magazine/news/articles/business_news.htm')
|
soup = self.index_to_soup('http://www.businessweek.com/magazine/news/articles/business_news.htm')
|
||||||
|
|
||||||
#Find date
|
# Find date
|
||||||
mag=soup.find('h2',text='Magazine')
|
mag=soup.find('h2',text='Magazine')
|
||||||
dates=self.tag_to_string(mag.findNext('h3'))
|
dates=self.tag_to_string(mag.findNext('h3'))
|
||||||
self.timefmt = u' [%s]'%dates
|
self.timefmt = u' [%s]'%dates
|
||||||
|
|
||||||
#Go to the main body
|
# Go to the main body
|
||||||
div0 = soup.find('div', attrs={'class':'column left'})
|
div0 = soup.find('div', attrs={'class':'column left'})
|
||||||
section_title = ''
|
section_title = ''
|
||||||
feeds = OrderedDict()
|
feeds = OrderedDict()
|
||||||
|
Loading…
x
Reference in New Issue
Block a user