# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:fdm=marker:ai from __future__ import unicode_literals from calibre.web.feeds.recipes import BasicNewsRecipe from lxml import html __license__ = 'GPL v3' class Nfcmag(BasicNewsRecipe): __author__ = '2014, Chen Wei ' title = 'Nan Feng Chuang / South Reviews Magazine' description = ''' South Reviews Magazine, established in 1985, is a Guangzhou-based political and economic biweekly. South Reviews enjoys a reputation of being fair and objective, with graceful narration, insightful expression among its readers, mostly government officials, economic leaders and intellectuals. It has been praised as “the No.1 Political& Economical Magazine in China”. The US magazine Time described South Reviews as "a highbrow news magazine". Other international media organizations such as BBC and NHK have conducted tracking shots of South Reviews journalists, to record their unique value special position in China’s media industry. Harvard-Yenching Library, Stanford University's East Asia Library and UC Berkeley Library have collections of the magazine since its first issue, taking them as an important source to understand China's economic and social reform. Since 2008, South Reviews has been committed to transforming into a research-based media organization. Most of its editors, reporters and contributors have remarkably strong academic backgrounds, coming from Peking University, Tsinghua University, London School of Economics and Political Science, the Chinese University of Hong Kong, Renmin University of China, and other well-known institutions. The magazine has established research divisions, including the State Policy Research Center and the Brand Promotion Research Center, working in cooperation with well-known academic institutions and providing valuable research reports for governments and companies. ''' language = 'zh' encoding = 'UTF-8' publisher = 'South Reviews Magazine' publication_type = 'magazine' category = 'news, Business, China' timefmt = ' [%a, %d %b, %Y]' needs_subscription = False remove_tags = [dict(attrs={'class': ['side-left', 'side-right', 'breadcrumbs', 'score', 'weboNav']}), dict(attrs={'id': ['header', 'footer']}), dict(name=['script', 'noscript', 'style'])] no_stylesheets = True remove_javascript = True current_issue_url = "" current_issue_cover = "" def parse_index(self): baseurl = 'http://www.nfcmag.com/' raw = self.index_to_soup('http://www.nfcmag.com/magazine', raw=True) soup_start = html.fromstring(raw) els = soup_start.xpath("""//div[contains(@class, 'lastest-magazine') and contains(@class, 'comBox')] //a[@href and not(@id) and not(child::img)] """) for x in els: issueurl = x.get('href') if not issueurl.lower().startswith('http://'): issueurl = baseurl + issueurl break raw = self.index_to_soup(issueurl, raw=True) soup_issue = html.fromstring(raw) coverimg = soup_issue.xpath("""//div[contains(@class, 'lastest-magazine') and contains(@class, 'comBox')] //img[@*] """) imgurl = coverimg[0].get('src') if not imgurl.lower().startswith('http://'): imgurl = baseurl + imgurl self.current_issue_cover = imgurl feeds = [] sections = soup_issue.xpath("""//div[contains(@class, 'article-box') and contains(@class, 'comBox')] """) for sec in sections: pages = sec.xpath('.//h5') sec_title = sec.xpath('.//h4')[0].text_content() self.log('Found section:', sec_title) articles = [] for x in pages: url = x.xpath('.//a')[0].get('href') if not url.lower().startswith('http://'): url = baseurl + url url = url[:-5] + '-s.html' # to print view title = x.text_content() articles.append({'title': title, 'url': url, 'date': None}) if articles: feeds.append((sec_title, articles)) return feeds def get_cover_url(self): return self.current_issue_cover