Update Caijing

2025-09-29 15:31:08 -04:00 · 2014-01-01 13:18:22 +05:30 · 2014-01-01 13:18:22 +05:30 · 225ebd4231
commit 225ebd4231
parent c497dc1097
1 changed files with 49 additions and 37 deletions
--- a/recipes/caijing.recipe
+++ b/recipes/caijing.recipe
@ -1,38 +1,47 @@
 import re
 from calibre.web.feeds.recipes import BasicNewsRecipe
 __license__   = 'GPL v3'
 class Caijing(BasicNewsRecipe):
-    title       = 'Caijing Magazine'
+    '''based on the recipe wrote by Eric Chen at 2011'''
    __author__  = 'Eric Chen'
-    description = '''Bi-weekly Finance and Economics Review. Founded in 1998, the fortnightly CAIJING
+    __author__  = '2014, Chen Wei <weichen302@gmx.com>'
-                 Magazine has firmly established itself as a news authority and leading voice for
+    title = 'Caijing Magazine'
-                 business and financial issues in China.
+    description = '''
-                 CAIJING Magazine closely tracks the most important aspects of China's economic reforms,
+    Founded in 1998, the fortnightly CAIJING Magazine has firmly established
-                 developments and policy changes, as well as major events in the capital markets. It also
+    itself as a news authority and leading voice for business and financial
-                 offers a broad international perspective through first-hand reporting on international
+    issues in China.
-                 political and economic issues.
+
-                 CAIJING Magazine is China's most widely read business and finance magazine, with a
+    CAIJING Magazine closely tracks the most important aspects of China's
-                 circulation of 225,000 per issue. It boasts top-level readers from government, business
+    economic reforms, developments and policy changes, as well as major events
-                 and academic circles. '''
+    in the capital markets. It also offers a broad international perspective
    through first-hand reporting on international political and economic
    issues.
    CAIJING Magazine is China's most widely read business and finance magazine,
    with a circulation of 225,000 per issue. It boasts top-level readers from
    government, business and academic circles.'''
    language = 'zh'
    category = 'news, China'
    encoding = 'UTF-8'
    publisher = 'Caijing Magazine'
    publication_type = 'magazine'
    category = 'news, Business, China'
    timefmt = ' [%a, %d %b, %Y]'
    needs_subscription = True
-    remove_tags = [dict(attrs={'class':['topad', 'nav', 'searchbox', 'connav',
+    remove_tags = [dict(attrs={'class':['head_nav', 'mcont_logo', 'header',
-        'mbx', 'bianji', 'bianji bj', 'lnewlist', 'rdtj', 'loadComment',
+                'bottom','footer', 'magazine_ipad','cjartShare', 'ar_about',
-        'conr', 'bottom', 'bottomcopyr', 'emaildy', 'rcom', 'allcontent']}),
+                'main_rt', 'mcont_nav', 'new']}),
-                dict(name=['script', 'noscript', 'style'])]
+                   dict(attrs={'id': ['articlePl']}),
                   dict(name=['script', 'noscript', 'style'])]
    no_stylesheets = True
    remove_javascript = True
    current_issue_url = ""
    current_issue_cover = ""
    def get_browser(self):
        br = BasicNewsRecipe.get_browser(self)
        if self.username is not None and self.password is not None:
@ -44,31 +53,34 @@ class Caijing(BasicNewsRecipe):
        return br
    def parse_index(self):
-        articles = []
+        soup_start = self.index_to_soup('http://magazine.caijing.com.cn/')
-        soup0 = self.index_to_soup('http://magazine.caijing.com.cn/2011/cjindex2011/')
+        jumpurl = soup_start.find('script').contents[0].split()
-        div = soup0.find('div', attrs={'class':'fmcon'})
+        for line in jumpurl:
-        link = div.find('a', href=True)
+            if 'http' in line.lower():
-        current_issue_url = link['href']
+                issuesurl = line.split('"')[1]
                break
        soup_issues = self.index_to_soup(issuesurl)
        # find the latest issue
        div = soup_issues.find('div', attrs={'class':'fmcon'})
        current_issue_url = div.find('a', href=True)['href']
        soup = self.index_to_soup(current_issue_url)
-
+        coverimg = soup.find('div', {'class': 'zzfm_img'})
-        for div_cover in soup.findAll('img', {'src' : re.compile('.')}):
+        self.current_issue_cover = coverimg.find('img')['src']
            if re.search('\d{4}-\d{2}-\d{2}', div_cover['src']):
                self.current_issue_cover = div_cover['src']
        feeds = []
-        for section in soup.findAll('div', attrs={'class':'cebd'}):
+        for section in soup.findAll('div',
-            section_title = self.tag_to_string(section.find('div', attrs={'class':'ceti'}))
+                        attrs={'class': re.compile(r'(fmwz_ml|zzlm_nr)2?$')}):
            section_title = self.tag_to_string(section.find('div',
                        attrs={'class':re.compile(r'(lmnav_bt|zzlm_bt)1?$')}))
            self.log('Found section:', section_title)
            articles = []
-            for post in section.findAll('a', href=True):
+            for post in section.findAll('div',
-                if re.search('\d{4}-\d{2}-\d{2}', post['href']):
+                        attrs={'class': re.compile(r'(fmwz_bt|zzlm_nr_bt)')}):
                        date = re.search('\d{4}-\d{2}-\d{2}', post['href']).group(0)
                id = re.search('\d{9}', post['href']).group(0)
                url = re.sub(r'\d.*', 'templates/inc/chargecontent2.jsp?id=', post['href'])
                url = url + id + '&time=' + date + '&cl=106&page=all'
                title = self.tag_to_string(post)
-                articles.append({'title':title, 'url':url, 'date':date})
+                url = post.find('a')['href']
                articles.append({'title': title, 'url': url, 'date': None})
            if articles:
                feeds.append((section_title, articles))