mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Update Business Week
This commit is contained in:
parent
42561905d7
commit
9161d924aa
@ -37,68 +37,15 @@ class BusinessWeek(BasicNewsRecipe):
|
|||||||
, 'language' : language
|
, 'language' : language
|
||||||
}
|
}
|
||||||
|
|
||||||
#remove_tags = [
|
|
||||||
#dict(attrs={'class':'inStory'})
|
|
||||||
#,dict(name=['meta','link','iframe','base','embed','object','table','th','tr','td'])
|
|
||||||
#,dict(attrs={'id':['inset','videoDisplay']})
|
|
||||||
#]
|
|
||||||
#keep_only_tags = [dict(name='div', attrs={'id':['story-body','storyBody']})]
|
|
||||||
remove_attributes = ['lang']
|
|
||||||
match_regexps = [r'http://www.businessweek.com/.*_page_[1-9].*']
|
|
||||||
|
|
||||||
|
|
||||||
feeds = [
|
feeds = [
|
||||||
(u'Top Stories', u'http://www.businessweek.com/topStories/rss/topStories.rss'),
|
(u'Top Stories', u'http://www.businessweek.com/feeds/most-popular.rss'),
|
||||||
(u'Top News' , u'http://www.businessweek.com/rss/bwdaily.rss' ),
|
|
||||||
(u'Asia', u'http://www.businessweek.com/rss/asia.rss'),
|
|
||||||
(u'Autos', u'http://www.businessweek.com/rss/autos/index.rss'),
|
|
||||||
(u'Classic Cars', u'http://rss.businessweek.com/bw_rss/classiccars'),
|
|
||||||
(u'Hybrids', u'http://rss.businessweek.com/bw_rss/hybrids'),
|
|
||||||
(u'Europe', u'http://www.businessweek.com/rss/europe.rss'),
|
|
||||||
(u'Auto Reviews', u'http://rss.businessweek.com/bw_rss/autoreviews'),
|
|
||||||
(u'Innovation & Design', u'http://www.businessweek.com/rss/innovate.rss'),
|
|
||||||
(u'Architecture', u'http://www.businessweek.com/rss/architecture.rss'),
|
|
||||||
(u'Brand Equity', u'http://www.businessweek.com/rss/brandequity.rss'),
|
|
||||||
(u'Auto Design', u'http://www.businessweek.com/rss/carbuff.rss'),
|
|
||||||
(u'Game Room', u'http://rss.businessweek.com/bw_rss/gameroom'),
|
|
||||||
(u'Technology', u'http://www.businessweek.com/rss/technology.rss'),
|
|
||||||
(u'Investing', u'http://rss.businessweek.com/bw_rss/investor'),
|
|
||||||
(u'Small Business', u'http://www.businessweek.com/rss/smallbiz.rss'),
|
|
||||||
(u'Careers', u'http://rss.businessweek.com/bw_rss/careers'),
|
|
||||||
(u'B-Schools', u'http://www.businessweek.com/rss/bschools.rss'),
|
|
||||||
(u'Magazine Selections', u'http://www.businessweek.com/rss/magazine.rss'),
|
|
||||||
(u'CEO Guide to Tech', u'http://www.businessweek.com/rss/ceo_guide_tech.rss'),
|
|
||||||
]
|
]
|
||||||
|
|
||||||
def get_article_url(self, article):
|
|
||||||
url = article.get('guid', None)
|
|
||||||
if 'podcasts' in url:
|
|
||||||
return None
|
|
||||||
if 'surveys' in url:
|
|
||||||
return None
|
|
||||||
if 'images' in url:
|
|
||||||
return None
|
|
||||||
if 'feedroom' in url:
|
|
||||||
return None
|
|
||||||
if '/magazine/toc/' in url:
|
|
||||||
return None
|
|
||||||
rurl, sep, rest = url.rpartition('?')
|
|
||||||
if rurl:
|
|
||||||
return rurl
|
|
||||||
return rest
|
|
||||||
|
|
||||||
def print_version(self, url):
|
def print_version(self, url):
|
||||||
if '/news/' in url or '/blog/ in url':
|
soup = self.index_to_soup(url)
|
||||||
return url
|
prntver = soup.find('li', attrs={'class':'print tracked'})
|
||||||
rurl = url.replace('http://www.businessweek.com/','http://www.businessweek.com/print/')
|
rurl = prntver.find('a', href=True)['href']
|
||||||
return rurl.replace('/investing/','/investor/')
|
return rurl
|
||||||
|
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
|
||||||
for item in soup.findAll(style=True):
|
|
||||||
del item['style']
|
|
||||||
for alink in soup.findAll('a'):
|
|
||||||
if alink.string is not None:
|
|
||||||
tstr = alink.string
|
|
||||||
alink.replaceWith(tstr)
|
|
||||||
return soup
|
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user