mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Nan Feng Chuang by Chen Wei
This commit is contained in:
parent
e88ec0a222
commit
3dbc252767
108
recipes/nanfengchuang.recipe
Normal file
108
recipes/nanfengchuang.recipe
Normal file
@ -0,0 +1,108 @@
|
|||||||
|
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:fdm=marker:ai
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||||
|
from lxml import html
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
|
||||||
|
|
||||||
|
class Nfcmag(BasicNewsRecipe):
|
||||||
|
|
||||||
|
__author__ = '2014, Chen Wei <weichen302@gmx.com>'
|
||||||
|
title = 'Nan Feng Chuang / South Reviews Magazine'
|
||||||
|
description = '''
|
||||||
|
South Reviews Magazine, established in 1985, is a Guangzhou-based political and
|
||||||
|
economic biweekly. South Reviews enjoys a reputation of being fair and objective, with graceful
|
||||||
|
narration, insightful expression among its readers, mostly government
|
||||||
|
officials, economic leaders and intellectuals. It has been praised as “the No.1
|
||||||
|
Political& Economical Magazine in China”.
|
||||||
|
|
||||||
|
The US magazine Time described South Reviews as "a highbrow news magazine".
|
||||||
|
Other international media organizations such as BBC and NHK have conducted
|
||||||
|
tracking shots of South Reviews journalists, to record their unique value
|
||||||
|
special position in China’s media industry. Harvard-Yenching Library, Stanford
|
||||||
|
University's East Asia Library and UC Berkeley Library have collections of the
|
||||||
|
magazine since its first issue, taking them as an important source to
|
||||||
|
understand China's economic and social reform.
|
||||||
|
|
||||||
|
Since 2008, South Reviews has been committed to transforming into a
|
||||||
|
research-based media organization. Most of its editors, reporters and
|
||||||
|
contributors have remarkably strong academic backgrounds, coming from Peking
|
||||||
|
University, Tsinghua University, London School of Economics and Political
|
||||||
|
Science, the Chinese University of Hong Kong, Renmin University of China, and
|
||||||
|
other well-known institutions. The magazine has established research divisions,
|
||||||
|
including the State Policy Research Center and the Brand Promotion Research
|
||||||
|
Center, working in cooperation with well-known academic institutions and
|
||||||
|
providing valuable research reports for governments and companies.
|
||||||
|
|
||||||
|
'''
|
||||||
|
language = 'zh'
|
||||||
|
encoding = 'UTF-8'
|
||||||
|
publisher = 'South Reviews Magazine'
|
||||||
|
publication_type = 'magazine'
|
||||||
|
category = 'news, Business, China'
|
||||||
|
timefmt = ' [%a, %d %b, %Y]'
|
||||||
|
needs_subscription = False
|
||||||
|
|
||||||
|
remove_tags = [dict(attrs={'class':['side-left', 'side-right',
|
||||||
|
'breadcrumbs', 'score', 'weboNav']}),
|
||||||
|
dict(attrs={'id': ['header', 'footer']}),
|
||||||
|
dict(name=['script', 'noscript', 'style'])]
|
||||||
|
no_stylesheets = True
|
||||||
|
remove_javascript = True
|
||||||
|
current_issue_url = ""
|
||||||
|
current_issue_cover = ""
|
||||||
|
|
||||||
|
def parse_index(self):
|
||||||
|
|
||||||
|
baseurl = 'http://www.nfcmag.com/'
|
||||||
|
raw = self.index_to_soup('http://www.nfcmag.com/magazine', raw=True)
|
||||||
|
soup_start = html.fromstring(raw)
|
||||||
|
|
||||||
|
#pageDoc = html.parse(sectionUrl)
|
||||||
|
els = soup_start.xpath("""//div[contains(@class, 'lastest-magazine')
|
||||||
|
and contains(@class, 'comBox')]
|
||||||
|
//a[@href and not(@id) and not(child::img)]
|
||||||
|
""")
|
||||||
|
for x in els:
|
||||||
|
issueurl = x.get('href')
|
||||||
|
if not issueurl.lower().startswith('http://'):
|
||||||
|
issueurl = baseurl + issueurl
|
||||||
|
break
|
||||||
|
|
||||||
|
raw = self.index_to_soup(issueurl, raw=True)
|
||||||
|
soup_issue = html.fromstring(raw)
|
||||||
|
|
||||||
|
coverimg = soup_issue.xpath("""//div[contains(@class, 'lastest-magazine')
|
||||||
|
and contains(@class, 'comBox')]
|
||||||
|
//img[@*] """)
|
||||||
|
imgurl = coverimg[0].get('src')
|
||||||
|
if not imgurl.lower().startswith('http://'):
|
||||||
|
imgurl = baseurl + imgurl
|
||||||
|
self.current_issue_cover = imgurl
|
||||||
|
feeds = []
|
||||||
|
|
||||||
|
sections = soup_issue.xpath("""//div[contains(@class, 'article-box')
|
||||||
|
and contains(@class, 'comBox')] """)
|
||||||
|
for sec in sections:
|
||||||
|
pages = sec.xpath('.//h5')
|
||||||
|
sec_title = sec.xpath('.//h4')[0].text_content()
|
||||||
|
self.log('Found section:', sec_title)
|
||||||
|
articles = []
|
||||||
|
for x in pages:
|
||||||
|
url = x.xpath('.//a')[0].get('href')
|
||||||
|
if not url.lower().startswith('http://'):
|
||||||
|
url = baseurl + url
|
||||||
|
url = url[:-5] + '-s.html' # to print view
|
||||||
|
|
||||||
|
title = x.text_content()
|
||||||
|
|
||||||
|
articles.append({'title': title, 'url': url, 'date': None})
|
||||||
|
|
||||||
|
if articles:
|
||||||
|
feeds.append((sec_title, articles))
|
||||||
|
return feeds
|
||||||
|
|
||||||
|
def get_cover_url(self):
|
||||||
|
return self.current_issue_cover
|
||||||
|
|
Loading…
x
Reference in New Issue
Block a user