From 6f3baa43575d0e5e26f9a1ca4f8bbfed06c22cb4 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Mon, 21 Mar 2011 16:49:49 -0600 Subject: [PATCH] Caijing Magazine by Eric Chen --- recipes/caijing.recipe | 79 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 79 insertions(+) create mode 100644 recipes/caijing.recipe diff --git a/recipes/caijing.recipe b/recipes/caijing.recipe new file mode 100644 index 0000000000..34e6c1e8a9 --- /dev/null +++ b/recipes/caijing.recipe @@ -0,0 +1,79 @@ +import re +from calibre.web.feeds.recipes import BasicNewsRecipe + + +class Caijing(BasicNewsRecipe): + + title = 'Caijing Magazine' + __author__ = 'Eric Chen' + + description = '''Bi-weekly Finance and Economics Review. Founded in 1998, the fortnightly CAIJING + Magazine has firmly established itself as a news authority and leading voice for + business and financial issues in China. + CAIJING Magazine closely tracks the most important aspects of China's economic reforms, + developments and policy changes, as well as major events in the capital markets. It also + offers a broad international perspective through first-hand reporting on international + political and economic issues. + CAIJING Magazine is China's most widely read business and finance magazine, with a + circulation of 225,000 per issue. It boasts top-level readers from government, business + and academic circles. ''' + language = 'zh' + category = 'news, China' + encoding = 'UTF-8' + timefmt = ' [%a, %d %b, %Y]' + needs_subscription = True + + remove_tags = [dict(attrs={'class':['topad', 'nav', 'searchbox', 'connav', + 'mbx', 'bianji', 'bianji bj', 'lnewlist', 'rdtj', 'loadComment', + 'conr', 'bottom', 'bottomcopyr', 'emaildy', 'rcom', 'allcontent']}), + dict(name=['script', 'noscript', 'style'])] + no_stylesheets = True + remove_javascript = True + current_issue_url = "" + current_issue_cover = "" + + + def get_browser(self): + br = BasicNewsRecipe.get_browser() + if self.username is not None and self.password is not None: + br.open('http://service.caijing.com.cn/usermanage/login') + br.select_form(name='mainLoginForm') + br['username'] = self.username + br['password'] = self.password + br.submit() + return br + + def parse_index(self): + articles = [] + soup0 = self.index_to_soup('http://magazine.caijing.com.cn/2011/cjindex2011/') + div = soup0.find('div', attrs={'class':'fmcon'}) + link = div.find('a', href=True) + current_issue_url = link['href'] + + soup = self.index_to_soup(current_issue_url) + + for div_cover in soup.findAll('img', {'src' : re.compile('.')}): + if re.search('\d{4}-\d{2}-\d{2}', div_cover['src']): + self.current_issue_cover = div_cover['src'] + + feeds = [] + for section in soup.findAll('div', attrs={'class':'cebd'}): + section_title = self.tag_to_string(section.find('div', attrs={'class':'ceti'})) + articles = [] + for post in section.findAll('a', href=True): + if re.search('\d{4}-\d{2}-\d{2}', post['href']): + date = re.search('\d{4}-\d{2}-\d{2}', post['href']).group(0) + id = re.search('\d{9}', post['href']).group(0) + url = re.sub(r'\d.*', 'templates/inc/chargecontent2.jsp?id=', post['href']) + url = url + id + '&time=' + date + '&cl=106&page=all' + + title = self.tag_to_string(post) + articles.append({'title':title, 'url':url, 'date':date}) + + if articles: + feeds.append((section_title, articles)) + return feeds + + def get_cover_url(self): + return self.current_issue_cover +