mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-08-07 09:01:38 -04:00
Update Caijing
This commit is contained in:
parent
c497dc1097
commit
225ebd4231
@ -1,38 +1,47 @@
|
|||||||
import re
|
import re
|
||||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
|
||||||
|
|
||||||
class Caijing(BasicNewsRecipe):
|
class Caijing(BasicNewsRecipe):
|
||||||
|
|
||||||
title = 'Caijing Magazine'
|
'''based on the recipe wrote by Eric Chen at 2011'''
|
||||||
__author__ = 'Eric Chen'
|
|
||||||
|
|
||||||
description = '''Bi-weekly Finance and Economics Review. Founded in 1998, the fortnightly CAIJING
|
__author__ = '2014, Chen Wei <weichen302@gmx.com>'
|
||||||
Magazine has firmly established itself as a news authority and leading voice for
|
title = 'Caijing Magazine'
|
||||||
business and financial issues in China.
|
description = '''
|
||||||
CAIJING Magazine closely tracks the most important aspects of China's economic reforms,
|
Founded in 1998, the fortnightly CAIJING Magazine has firmly established
|
||||||
developments and policy changes, as well as major events in the capital markets. It also
|
itself as a news authority and leading voice for business and financial
|
||||||
offers a broad international perspective through first-hand reporting on international
|
issues in China.
|
||||||
political and economic issues.
|
|
||||||
CAIJING Magazine is China's most widely read business and finance magazine, with a
|
CAIJING Magazine closely tracks the most important aspects of China's
|
||||||
circulation of 225,000 per issue. It boasts top-level readers from government, business
|
economic reforms, developments and policy changes, as well as major events
|
||||||
and academic circles. '''
|
in the capital markets. It also offers a broad international perspective
|
||||||
|
through first-hand reporting on international political and economic
|
||||||
|
issues.
|
||||||
|
|
||||||
|
CAIJING Magazine is China's most widely read business and finance magazine,
|
||||||
|
with a circulation of 225,000 per issue. It boasts top-level readers from
|
||||||
|
government, business and academic circles.'''
|
||||||
language = 'zh'
|
language = 'zh'
|
||||||
category = 'news, China'
|
|
||||||
encoding = 'UTF-8'
|
encoding = 'UTF-8'
|
||||||
|
publisher = 'Caijing Magazine'
|
||||||
|
publication_type = 'magazine'
|
||||||
|
category = 'news, Business, China'
|
||||||
timefmt = ' [%a, %d %b, %Y]'
|
timefmt = ' [%a, %d %b, %Y]'
|
||||||
needs_subscription = True
|
needs_subscription = True
|
||||||
|
|
||||||
remove_tags = [dict(attrs={'class':['topad', 'nav', 'searchbox', 'connav',
|
remove_tags = [dict(attrs={'class':['head_nav', 'mcont_logo', 'header',
|
||||||
'mbx', 'bianji', 'bianji bj', 'lnewlist', 'rdtj', 'loadComment',
|
'bottom','footer', 'magazine_ipad','cjartShare', 'ar_about',
|
||||||
'conr', 'bottom', 'bottomcopyr', 'emaildy', 'rcom', 'allcontent']}),
|
'main_rt', 'mcont_nav', 'new']}),
|
||||||
dict(name=['script', 'noscript', 'style'])]
|
dict(attrs={'id': ['articlePl']}),
|
||||||
|
dict(name=['script', 'noscript', 'style'])]
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
remove_javascript = True
|
remove_javascript = True
|
||||||
current_issue_url = ""
|
current_issue_url = ""
|
||||||
current_issue_cover = ""
|
current_issue_cover = ""
|
||||||
|
|
||||||
|
|
||||||
def get_browser(self):
|
def get_browser(self):
|
||||||
br = BasicNewsRecipe.get_browser(self)
|
br = BasicNewsRecipe.get_browser(self)
|
||||||
if self.username is not None and self.password is not None:
|
if self.username is not None and self.password is not None:
|
||||||
@ -44,31 +53,34 @@ class Caijing(BasicNewsRecipe):
|
|||||||
return br
|
return br
|
||||||
|
|
||||||
def parse_index(self):
|
def parse_index(self):
|
||||||
articles = []
|
soup_start = self.index_to_soup('http://magazine.caijing.com.cn/')
|
||||||
soup0 = self.index_to_soup('http://magazine.caijing.com.cn/2011/cjindex2011/')
|
jumpurl = soup_start.find('script').contents[0].split()
|
||||||
div = soup0.find('div', attrs={'class':'fmcon'})
|
for line in jumpurl:
|
||||||
link = div.find('a', href=True)
|
if 'http' in line.lower():
|
||||||
current_issue_url = link['href']
|
issuesurl = line.split('"')[1]
|
||||||
|
break
|
||||||
|
|
||||||
|
soup_issues = self.index_to_soup(issuesurl)
|
||||||
|
# find the latest issue
|
||||||
|
div = soup_issues.find('div', attrs={'class':'fmcon'})
|
||||||
|
current_issue_url = div.find('a', href=True)['href']
|
||||||
|
|
||||||
soup = self.index_to_soup(current_issue_url)
|
soup = self.index_to_soup(current_issue_url)
|
||||||
|
coverimg = soup.find('div', {'class': 'zzfm_img'})
|
||||||
for div_cover in soup.findAll('img', {'src' : re.compile('.')}):
|
self.current_issue_cover = coverimg.find('img')['src']
|
||||||
if re.search('\d{4}-\d{2}-\d{2}', div_cover['src']):
|
|
||||||
self.current_issue_cover = div_cover['src']
|
|
||||||
|
|
||||||
feeds = []
|
feeds = []
|
||||||
for section in soup.findAll('div', attrs={'class':'cebd'}):
|
for section in soup.findAll('div',
|
||||||
section_title = self.tag_to_string(section.find('div', attrs={'class':'ceti'}))
|
attrs={'class': re.compile(r'(fmwz_ml|zzlm_nr)2?$')}):
|
||||||
|
section_title = self.tag_to_string(section.find('div',
|
||||||
|
attrs={'class':re.compile(r'(lmnav_bt|zzlm_bt)1?$')}))
|
||||||
|
self.log('Found section:', section_title)
|
||||||
articles = []
|
articles = []
|
||||||
for post in section.findAll('a', href=True):
|
for post in section.findAll('div',
|
||||||
if re.search('\d{4}-\d{2}-\d{2}', post['href']):
|
attrs={'class': re.compile(r'(fmwz_bt|zzlm_nr_bt)')}):
|
||||||
date = re.search('\d{4}-\d{2}-\d{2}', post['href']).group(0)
|
|
||||||
id = re.search('\d{9}', post['href']).group(0)
|
|
||||||
url = re.sub(r'\d.*', 'templates/inc/chargecontent2.jsp?id=', post['href'])
|
|
||||||
url = url + id + '&time=' + date + '&cl=106&page=all'
|
|
||||||
|
|
||||||
title = self.tag_to_string(post)
|
title = self.tag_to_string(post)
|
||||||
articles.append({'title':title, 'url':url, 'date':date})
|
url = post.find('a')['href']
|
||||||
|
articles.append({'title': title, 'url': url, 'date': None})
|
||||||
|
|
||||||
if articles:
|
if articles:
|
||||||
feeds.append((section_title, articles))
|
feeds.append((section_title, articles))
|
||||||
|
Loading…
x
Reference in New Issue
Block a user