Nan Feng Chuang by Chen Wei

2025-12-17 02:25:02 -05:00 · 2014-01-01 19:06:19 +05:30 · 2014-01-01 19:06:19 +05:30 · 3dbc252767
commit 3dbc252767
parent e88ec0a222
1 changed files with 108 additions and 0 deletions
--- a/recipes/nanfengchuang.recipe
+++ b/recipes/nanfengchuang.recipe
@ -0,0 +1,108 @@
 # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:fdm=marker:ai
 from __future__ import unicode_literals
 from calibre.web.feeds.recipes import BasicNewsRecipe
 from lxml import html
 __license__   = 'GPL v3'
 class Nfcmag(BasicNewsRecipe):
    __author__  = '2014, Chen Wei <weichen302@gmx.com>'
    title = 'Nan Feng Chuang / South Reviews Magazine'
    description = '''
 South Reviews Magazine, established in 1985, is a Guangzhou-based political and
 economic biweekly. South Reviews enjoys a reputation of being fair and objective, with graceful
 narration, insightful expression among its readers, mostly government
 officials, economic leaders and intellectuals. It has been praised as “the No.1
 Political& Economical Magazine in China”.
 The US magazine Time described South Reviews as "a highbrow news magazine".
 Other international media organizations such as BBC and NHK have conducted
 tracking shots of South Reviews journalists, to record their unique value
 special position in China’s media industry. Harvard-Yenching Library, Stanford
 University's East Asia Library and UC Berkeley Library have collections of the
 magazine since its first issue, taking them as an important source to
 understand China's economic and social reform.
 Since 2008, South Reviews has been committed to transforming into a
 research-based media organization. Most of its editors, reporters and
 contributors have remarkably strong academic backgrounds, coming from Peking
 University, Tsinghua University, London School of Economics and Political
 Science, the Chinese University of Hong Kong, Renmin University of China, and
 other well-known institutions. The magazine has established research divisions,
 including the State Policy Research Center and the Brand Promotion Research
 Center, working in cooperation with well-known academic institutions and
 providing valuable research reports for governments and companies.
 '''
    language = 'zh'
    encoding = 'UTF-8'
    publisher = 'South Reviews Magazine'
    publication_type = 'magazine'
    category = 'news, Business, China'
    timefmt = ' [%a, %d %b, %Y]'
    needs_subscription = False
    remove_tags = [dict(attrs={'class':['side-left', 'side-right',
                                        'breadcrumbs', 'score', 'weboNav']}),
                   dict(attrs={'id': ['header', 'footer']}),
                   dict(name=['script', 'noscript', 'style'])]
    no_stylesheets = True
    remove_javascript = True
    current_issue_url = ""
    current_issue_cover = ""
    def parse_index(self):
        baseurl = 'http://www.nfcmag.com/'
        raw = self.index_to_soup('http://www.nfcmag.com/magazine', raw=True)
        soup_start = html.fromstring(raw)
        #pageDoc = html.parse(sectionUrl)
        els = soup_start.xpath("""//div[contains(@class, 'lastest-magazine')
                         and contains(@class, 'comBox')]
                         //a[@href and not(@id) and not(child::img)]
                         """)
        for x in els:
            issueurl = x.get('href')
            if not issueurl.lower().startswith('http://'):
                issueurl = baseurl + issueurl
            break
        raw = self.index_to_soup(issueurl, raw=True)
        soup_issue = html.fromstring(raw)
        coverimg = soup_issue.xpath("""//div[contains(@class, 'lastest-magazine')
                         and contains(@class, 'comBox')]
                         //img[@*] """)
        imgurl = coverimg[0].get('src')
        if not imgurl.lower().startswith('http://'):
            imgurl = baseurl + imgurl
        self.current_issue_cover = imgurl
        feeds = []
        sections = soup_issue.xpath("""//div[contains(@class, 'article-box')
                         and contains(@class, 'comBox')] """)
        for sec in sections:
            pages = sec.xpath('.//h5')
            sec_title = sec.xpath('.//h4')[0].text_content()
            self.log('Found section:', sec_title)
            articles = []
            for x in pages:
                url = x.xpath('.//a')[0].get('href')
                if not url.lower().startswith('http://'):
                    url = baseurl + url
                url = url[:-5] + '-s.html'  # to print view
                title =  x.text_content()
                articles.append({'title': title, 'url': url, 'date': None})
            if articles:
                feeds.append((sec_title, articles))
        return feeds
    def get_cover_url(self):
        return self.current_issue_cover