From 3dbc25276711a5966c6071d3bede9bba318b3450 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Wed, 1 Jan 2014 19:06:19 +0530 Subject: [PATCH] Nan Feng Chuang by Chen Wei --- recipes/nanfengchuang.recipe | 108 +++++++++++++++++++++++++++++++++++ 1 file changed, 108 insertions(+) create mode 100644 recipes/nanfengchuang.recipe diff --git a/recipes/nanfengchuang.recipe b/recipes/nanfengchuang.recipe new file mode 100644 index 0000000000..dce397986c --- /dev/null +++ b/recipes/nanfengchuang.recipe @@ -0,0 +1,108 @@ +# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:fdm=marker:ai +from __future__ import unicode_literals +from calibre.web.feeds.recipes import BasicNewsRecipe +from lxml import html + +__license__ = 'GPL v3' + + +class Nfcmag(BasicNewsRecipe): + + __author__ = '2014, Chen Wei ' + title = 'Nan Feng Chuang / South Reviews Magazine' + description = ''' +South Reviews Magazine, established in 1985, is a Guangzhou-based political and +economic biweekly. South Reviews enjoys a reputation of being fair and objective, with graceful +narration, insightful expression among its readers, mostly government +officials, economic leaders and intellectuals. It has been praised as “the No.1 +Political& Economical Magazine in China”. + +The US magazine Time described South Reviews as "a highbrow news magazine". +Other international media organizations such as BBC and NHK have conducted +tracking shots of South Reviews journalists, to record their unique value +special position in China’s media industry. Harvard-Yenching Library, Stanford +University's East Asia Library and UC Berkeley Library have collections of the +magazine since its first issue, taking them as an important source to +understand China's economic and social reform. + +Since 2008, South Reviews has been committed to transforming into a +research-based media organization. Most of its editors, reporters and +contributors have remarkably strong academic backgrounds, coming from Peking +University, Tsinghua University, London School of Economics and Political +Science, the Chinese University of Hong Kong, Renmin University of China, and +other well-known institutions. The magazine has established research divisions, +including the State Policy Research Center and the Brand Promotion Research +Center, working in cooperation with well-known academic institutions and +providing valuable research reports for governments and companies. + +''' + language = 'zh' + encoding = 'UTF-8' + publisher = 'South Reviews Magazine' + publication_type = 'magazine' + category = 'news, Business, China' + timefmt = ' [%a, %d %b, %Y]' + needs_subscription = False + + remove_tags = [dict(attrs={'class':['side-left', 'side-right', + 'breadcrumbs', 'score', 'weboNav']}), + dict(attrs={'id': ['header', 'footer']}), + dict(name=['script', 'noscript', 'style'])] + no_stylesheets = True + remove_javascript = True + current_issue_url = "" + current_issue_cover = "" + + def parse_index(self): + + baseurl = 'http://www.nfcmag.com/' + raw = self.index_to_soup('http://www.nfcmag.com/magazine', raw=True) + soup_start = html.fromstring(raw) + + #pageDoc = html.parse(sectionUrl) + els = soup_start.xpath("""//div[contains(@class, 'lastest-magazine') + and contains(@class, 'comBox')] + //a[@href and not(@id) and not(child::img)] + """) + for x in els: + issueurl = x.get('href') + if not issueurl.lower().startswith('http://'): + issueurl = baseurl + issueurl + break + + raw = self.index_to_soup(issueurl, raw=True) + soup_issue = html.fromstring(raw) + + coverimg = soup_issue.xpath("""//div[contains(@class, 'lastest-magazine') + and contains(@class, 'comBox')] + //img[@*] """) + imgurl = coverimg[0].get('src') + if not imgurl.lower().startswith('http://'): + imgurl = baseurl + imgurl + self.current_issue_cover = imgurl + feeds = [] + + sections = soup_issue.xpath("""//div[contains(@class, 'article-box') + and contains(@class, 'comBox')] """) + for sec in sections: + pages = sec.xpath('.//h5') + sec_title = sec.xpath('.//h4')[0].text_content() + self.log('Found section:', sec_title) + articles = [] + for x in pages: + url = x.xpath('.//a')[0].get('href') + if not url.lower().startswith('http://'): + url = baseurl + url + url = url[:-5] + '-s.html' # to print view + + title = x.text_content() + + articles.append({'title': title, 'url': url, 'date': None}) + + if articles: + feeds.append((sec_title, articles)) + return feeds + + def get_cover_url(self): + return self.current_issue_cover +