calibre/recipes/nanfengchuang.recipe
Kovid Goyal 567040ee1e Perform PEP8 compliance checks on the entire codebase
Some bits of PEP 8 are turned off via setup.cfg
2016-07-29 21:25:17 +05:30

107 lines
4.4 KiB
Plaintext
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:fdm=marker:ai
from __future__ import unicode_literals
from calibre.web.feeds.recipes import BasicNewsRecipe
from lxml import html
__license__ = 'GPL v3'
class Nfcmag(BasicNewsRecipe):
__author__ = '2014, Chen Wei <weichen302@gmx.com>'
title = 'Nan Feng Chuang / South Reviews Magazine'
description = '''
South Reviews Magazine, established in 1985, is a Guangzhou-based political and
economic biweekly. South Reviews enjoys a reputation of being fair and objective, with graceful
narration, insightful expression among its readers, mostly government
officials, economic leaders and intellectuals. It has been praised as “the No.1
Political& Economical Magazine in China”.
The US magazine Time described South Reviews as "a highbrow news magazine".
Other international media organizations such as BBC and NHK have conducted
tracking shots of South Reviews journalists, to record their unique value
special position in Chinas media industry. Harvard-Yenching Library, Stanford
University's East Asia Library and UC Berkeley Library have collections of the
magazine since its first issue, taking them as an important source to
understand China's economic and social reform.
Since 2008, South Reviews has been committed to transforming into a
research-based media organization. Most of its editors, reporters and
contributors have remarkably strong academic backgrounds, coming from Peking
University, Tsinghua University, London School of Economics and Political
Science, the Chinese University of Hong Kong, Renmin University of China, and
other well-known institutions. The magazine has established research divisions,
including the State Policy Research Center and the Brand Promotion Research
Center, working in cooperation with well-known academic institutions and
providing valuable research reports for governments and companies.
'''
language = 'zh'
encoding = 'UTF-8'
publisher = 'South Reviews Magazine'
publication_type = 'magazine'
category = 'news, Business, China'
timefmt = ' [%a, %d %b, %Y]'
needs_subscription = False
remove_tags = [dict(attrs={'class': ['side-left', 'side-right',
'breadcrumbs', 'score', 'weboNav']}),
dict(attrs={'id': ['header', 'footer']}),
dict(name=['script', 'noscript', 'style'])]
no_stylesheets = True
remove_javascript = True
current_issue_url = ""
current_issue_cover = ""
def parse_index(self):
baseurl = 'http://www.nfcmag.com/'
raw = self.index_to_soup('http://www.nfcmag.com/magazine', raw=True)
soup_start = html.fromstring(raw)
els = soup_start.xpath("""//div[contains(@class, 'lastest-magazine')
and contains(@class, 'comBox')]
//a[@href and not(@id) and not(child::img)]
""")
for x in els:
issueurl = x.get('href')
if not issueurl.lower().startswith('http://'):
issueurl = baseurl + issueurl
break
raw = self.index_to_soup(issueurl, raw=True)
soup_issue = html.fromstring(raw)
coverimg = soup_issue.xpath("""//div[contains(@class, 'lastest-magazine')
and contains(@class, 'comBox')]
//img[@*] """)
imgurl = coverimg[0].get('src')
if not imgurl.lower().startswith('http://'):
imgurl = baseurl + imgurl
self.current_issue_cover = imgurl
feeds = []
sections = soup_issue.xpath("""//div[contains(@class, 'article-box')
and contains(@class, 'comBox')] """)
for sec in sections:
pages = sec.xpath('.//h5')
sec_title = sec.xpath('.//h4')[0].text_content()
self.log('Found section:', sec_title)
articles = []
for x in pages:
url = x.xpath('.//a')[0].get('href')
if not url.lower().startswith('http://'):
url = baseurl + url
url = url[:-5] + '-s.html' # to print view
title = x.text_content()
articles.append({'title': title, 'url': url, 'date': None})
if articles:
feeds.append((sec_title, articles))
return feeds
def get_cover_url(self):
return self.current_issue_cover