mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Merge branch 'master' of https://github.com/jony0008/calibre
This commit is contained in:
commit
47fbf3b885
@ -1,21 +0,0 @@
|
|||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
|
|
||||||
class BBCArabic(BasicNewsRecipe):
|
|
||||||
title = u'BBC Arabic Middle East'
|
|
||||||
oldest_article = 7
|
|
||||||
max_articles_per_feed = 100
|
|
||||||
extra_css = 'body { text-align: right; direction:rtl; } '
|
|
||||||
auto_cleanup = True
|
|
||||||
language = 'ar'
|
|
||||||
__author__ = 'logophile777'
|
|
||||||
remove_tags = [
|
|
||||||
{'class': ['emp-alt-handheld', 'emp-noflash',
|
|
||||||
'emp-flashlink', 'emp-alt-screen']}
|
|
||||||
]
|
|
||||||
|
|
||||||
feeds = [(u'BBC Arabic Middle East',
|
|
||||||
u'http://www.bbc.co.uk/arabic/middleeast/index.xml')]
|
|
||||||
|
|
||||||
def print_version(self, url):
|
|
||||||
return url + '?print=1'
|
|
@ -1,46 +0,0 @@
|
|||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
|
|
||||||
class AdvancedUserRecipe1277443634(BasicNewsRecipe):
|
|
||||||
title = u'BBC Chinese'
|
|
||||||
oldest_article = 7
|
|
||||||
max_articles_per_feed = 100
|
|
||||||
|
|
||||||
feeds = [
|
|
||||||
(u'\u4e3b\u9875', u'http://www.bbc.co.uk/zhongwen/simp/index.xml'),
|
|
||||||
(u'\u56fd\u9645\u65b0\u95fb',
|
|
||||||
u'http://www.bbc.co.uk/zhongwen/simp/world/index.xml'),
|
|
||||||
(u'\u4e24\u5cb8\u4e09\u5730',
|
|
||||||
u'http://www.bbc.co.uk/zhongwen/simp/china/index.xml'),
|
|
||||||
(u'\u91d1\u878d\u8d22\u7ecf',
|
|
||||||
u'http://www.bbc.co.uk/zhongwen/simp/business/index.xml'),
|
|
||||||
(u'\u7f51\u4e0a\u4e92\u52a8',
|
|
||||||
u'http://www.bbc.co.uk/zhongwen/simp/interactive/index.xml'),
|
|
||||||
(u'\u97f3\u89c6\u56fe\u7247',
|
|
||||||
u'http://www.bbc.co.uk/zhongwen/simp/multimedia/index.xml'),
|
|
||||||
(u'\u5206\u6790\u8bc4\u8bba',
|
|
||||||
u'http://www.bbc.co.uk/zhongwen/simp/indepth/index.xml')
|
|
||||||
]
|
|
||||||
extra_css = '''
|
|
||||||
@font-face {font-family: "DroidFont", serif, sans-serif; src: url(res:///system/fonts/DroidSansFallback.ttf); }\n
|
|
||||||
body {margin-right: 8pt; font-family: 'DroidFont', serif;}\n
|
|
||||||
h1 {font-family: 'DroidFont', serif;}\n
|
|
||||||
.articledescription {font-family: 'DroidFont', serif;}
|
|
||||||
'''
|
|
||||||
__author__ = 'rty'
|
|
||||||
__version__ = '1.0'
|
|
||||||
language = 'zh'
|
|
||||||
pubisher = 'British Broadcasting Corporation'
|
|
||||||
description = 'BBC news in Chinese'
|
|
||||||
category = 'News, Chinese'
|
|
||||||
remove_javascript = True
|
|
||||||
use_embedded_content = False
|
|
||||||
no_stylesheets = True
|
|
||||||
encoding = 'UTF-8'
|
|
||||||
conversion_options = {'linearize_tables': True}
|
|
||||||
masthead_url = 'http://wscdn.bbc.co.uk/zhongwen/simp/images/1024/brand.jpg'
|
|
||||||
keep_only_tags = [
|
|
||||||
dict(name='h1'),
|
|
||||||
dict(name='p', attrs={'class': ['primary-topic', 'summary']}),
|
|
||||||
dict(name='div', attrs={'class': ['bodytext', 'datestamp']}),
|
|
||||||
]
|
|
@ -1,44 +0,0 @@
|
|||||||
__license__ = 'GPL v3'
|
|
||||||
__copyright__ = '2012, Alayn Gortazar <zutoin at gmail dot com>'
|
|
||||||
'''
|
|
||||||
www.berria.info
|
|
||||||
'''
|
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
|
|
||||||
class Berria(BasicNewsRecipe):
|
|
||||||
title = 'Berria'
|
|
||||||
__author__ = 'Alayn Gortazar'
|
|
||||||
description = 'Euskal Herriko euskarazko egunkaria'
|
|
||||||
publisher = 'Berria'
|
|
||||||
category = 'news, politics, sports, Basque Country'
|
|
||||||
oldest_article = 2
|
|
||||||
max_articles_per_feed = 100
|
|
||||||
no_stylesheets = True
|
|
||||||
use_embedded_content = False
|
|
||||||
language = 'eu'
|
|
||||||
remove_empty_feeds = True
|
|
||||||
masthead_url = 'http://upload.wikimedia.org/wikipedia/commons/thumb/6/6a/Berria_Logo.svg/400px-Berria_Logo.svg.png'
|
|
||||||
|
|
||||||
keep_only_tags = [
|
|
||||||
dict(id='goiburua'),
|
|
||||||
dict(name='div', attrs={'class': ['ber_ikus']}),
|
|
||||||
dict(name='section', attrs={'class': 'ber_ikus'})
|
|
||||||
]
|
|
||||||
remove_tags = [
|
|
||||||
dict(name='a', attrs={'class': 'iruzkinak'}),
|
|
||||||
dict(name='div', attrs={'class': 'laguntzaileak'})
|
|
||||||
]
|
|
||||||
|
|
||||||
extra_css = '#goiburua{font-weight: bold} .zintiloa{font-size: small} .sarrera{color:#666} .titularra{font-size: x-large} .sarrera{font-weight: bold} .argazoin{color:#666; font-size: small}' # noqa
|
|
||||||
|
|
||||||
feeds = [
|
|
||||||
(u'Edizioa jarraia', u'http://berria.info/rss/ediziojarraia.xml'),
|
|
||||||
(u'Iritzia', u'http://berria.info/rss/iritzia.xml'),
|
|
||||||
(u'Euskal Herria', u'http://berria.info/rss/euskalherria.xml'),
|
|
||||||
(u'Ekonomia', u'http://berria.info/rss/ekonomia.xml'),
|
|
||||||
(u'Mundua', u'http://berria.info/rss/mundua.xml'),
|
|
||||||
(u'Kirola', u'http://berria.info/rss/kirola.xml'),
|
|
||||||
(u'Plaza', u'http://berria.info/rss/plaza.xml')
|
|
||||||
]
|
|
@ -1,20 +0,0 @@
|
|||||||
# -*- coding: utf-8 -*-
|
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
|
|
||||||
class BlogdaCidadania(BasicNewsRecipe):
|
|
||||||
title = 'Blog da Cidadania'
|
|
||||||
__author__ = 'Diniz Bortolotto'
|
|
||||||
description = 'Posts do Blog da Cidadania'
|
|
||||||
oldest_article = 7
|
|
||||||
max_articles_per_feed = 50
|
|
||||||
encoding = 'utf8'
|
|
||||||
publisher = 'Eduardo Guimaraes'
|
|
||||||
category = 'politics, Brazil'
|
|
||||||
language = 'pt_BR'
|
|
||||||
publication_type = 'politics portal'
|
|
||||||
|
|
||||||
feeds = [(u'Blog da Cidadania', u'http://www.blogcidadania.com.br/feed/')]
|
|
||||||
|
|
||||||
reverse_article_order = True
|
|
@ -1,90 +0,0 @@
|
|||||||
import re
|
|
||||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
|
||||||
|
|
||||||
|
|
||||||
class Caijing(BasicNewsRecipe):
|
|
||||||
|
|
||||||
'''based on the recipe wrote by Eric Chen at 2011'''
|
|
||||||
|
|
||||||
__author__ = '2014, Chen Wei <weichen302@gmx.com>'
|
|
||||||
title = 'Caijing Magazine'
|
|
||||||
description = '''
|
|
||||||
Founded in 1998, the fortnightly CAIJING Magazine has firmly established
|
|
||||||
itself as a news authority and leading voice for business and financial
|
|
||||||
issues in China.
|
|
||||||
|
|
||||||
CAIJING Magazine closely tracks the most important aspects of China's
|
|
||||||
economic reforms, developments and policy changes, as well as major events
|
|
||||||
in the capital markets. It also offers a broad international perspective
|
|
||||||
through first-hand reporting on international political and economic
|
|
||||||
issues.
|
|
||||||
|
|
||||||
CAIJING Magazine is China's most widely read business and finance magazine,
|
|
||||||
with a circulation of 225,000 per issue. It boasts top-level readers from
|
|
||||||
government, business and academic circles.'''
|
|
||||||
language = 'zh'
|
|
||||||
encoding = 'UTF-8'
|
|
||||||
publisher = 'Caijing Magazine'
|
|
||||||
publication_type = 'magazine'
|
|
||||||
category = 'news, Business, China'
|
|
||||||
timefmt = ' [%a, %d %b, %Y]'
|
|
||||||
needs_subscription = True
|
|
||||||
|
|
||||||
remove_tags = [dict(attrs={'class': ['head_nav', 'mcont_logo', 'header',
|
|
||||||
'bottom', 'footer', 'magazine_ipad', 'cjartShare', 'ar_about',
|
|
||||||
'main_rt', 'mcont_nav', 'new']}),
|
|
||||||
dict(attrs={'id': ['articlePl']}),
|
|
||||||
dict(name=['script', 'noscript', 'style'])]
|
|
||||||
no_stylesheets = True
|
|
||||||
remove_javascript = True
|
|
||||||
current_issue_url = ""
|
|
||||||
current_issue_cover = ""
|
|
||||||
|
|
||||||
def get_browser(self):
|
|
||||||
br = BasicNewsRecipe.get_browser(self)
|
|
||||||
if self.username is not None and self.password is not None:
|
|
||||||
br.open('http://service.caijing.com.cn/usermanage/login')
|
|
||||||
br.select_form(name='mainLoginForm')
|
|
||||||
br['username'] = self.username
|
|
||||||
br['password'] = self.password
|
|
||||||
br.submit()
|
|
||||||
return br
|
|
||||||
|
|
||||||
def parse_index(self):
|
|
||||||
soup_start = self.index_to_soup('http://magazine.caijing.com.cn/')
|
|
||||||
jumpurl = soup_start.find('script').contents[0].split()
|
|
||||||
for line in jumpurl:
|
|
||||||
if 'http' in line.lower():
|
|
||||||
issuesurl = line.split('"')[1]
|
|
||||||
break
|
|
||||||
|
|
||||||
soup_issues = self.index_to_soup(issuesurl)
|
|
||||||
# find the latest issue
|
|
||||||
div = soup_issues.find('div', attrs={'class': 'fmcon'})
|
|
||||||
current_issue_url = div.find('a', href=True)['href']
|
|
||||||
|
|
||||||
soup = self.index_to_soup(current_issue_url)
|
|
||||||
coverimg = soup.find('div', {'class': 'zzfm_img'})
|
|
||||||
self.current_issue_cover = coverimg.find('img')['src']
|
|
||||||
|
|
||||||
feeds = []
|
|
||||||
for section in soup.findAll('div',
|
|
||||||
attrs={'class': re.compile(r'(fmwz_ml|zzlm_nr)2?$')}):
|
|
||||||
section_title = self.tag_to_string(section.find('div',
|
|
||||||
attrs={'class': re.compile(r'(lmnav_bt|zzlm_bt)1?$')}))
|
|
||||||
self.log('Found section:', section_title)
|
|
||||||
articles = []
|
|
||||||
for post in section.findAll('div',
|
|
||||||
attrs={'class': re.compile(r'(fmwz_bt|zzlm_nr_bt)')}):
|
|
||||||
title = self.tag_to_string(post)
|
|
||||||
url = post.find('a')['href']
|
|
||||||
articles.append({'title': title, 'url': url, 'date': None})
|
|
||||||
|
|
||||||
if articles:
|
|
||||||
feeds.append((section_title, articles))
|
|
||||||
return feeds
|
|
||||||
|
|
||||||
def get_cover_url(self):
|
|
||||||
return self.current_issue_cover
|
|
@ -1,16 +0,0 @@
|
|||||||
__copyright__ = '2011, Pablo Aldama <pabloaldama at gmail.com>'
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
|
|
||||||
class AdvancedUserRecipe1311839910(BasicNewsRecipe):
|
|
||||||
title = u'Caros Amigos'
|
|
||||||
oldest_article = 20
|
|
||||||
max_articles_per_feed = 100
|
|
||||||
language = 'pt_BR'
|
|
||||||
__author__ = 'Pablo Aldama'
|
|
||||||
|
|
||||||
feeds = [(u'Caros Amigos',
|
|
||||||
u'http://carosamigos.terra.com.br/index2/index.php?format=feed&type=rss')]
|
|
||||||
keep_only_tags = [dict(name='div', attrs={'class': ['blog']}), dict(name='div', attrs={'class': ['blogcontent']})
|
|
||||||
]
|
|
||||||
remove_tags = [dict(name='div', attrs={'class': 'addtoany'})]
|
|
@ -1,29 +0,0 @@
|
|||||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
|
||||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
|
|
||||||
class AdvancedUserRecipe1380852962(BasicNewsRecipe):
|
|
||||||
title = u'Carta Capital'
|
|
||||||
__author__ = 'Erico Lisboa'
|
|
||||||
language = 'pt_BR'
|
|
||||||
oldest_article = 15
|
|
||||||
max_articles_per_feed = 100
|
|
||||||
auto_cleanup = True
|
|
||||||
use_embedded_content = False
|
|
||||||
|
|
||||||
feeds = [(u'Pol\xedtica',
|
|
||||||
u'http://www.cartacapital.com.br/politica/politica/rss'), (u'Economia',
|
|
||||||
u'http://www.cartacapital.com.br/economia/economia/atom.xml'),
|
|
||||||
(u'Sociedade',
|
|
||||||
u'http://www.cartacapital.com.br/sociedade/sociedade/atom.xml'),
|
|
||||||
(u'Internacional',
|
|
||||||
u'http://www.cartacapital.com.br/internacional/internacional/atom.xml'),
|
|
||||||
(u'Tecnologia',
|
|
||||||
u'http://www.cartacapital.com.br/tecnologia/tecnologia/atom.xml'),
|
|
||||||
(u'Cultura',
|
|
||||||
u'http://www.cartacapital.com.br/cultura/cultura/atom.xml'),
|
|
||||||
(u'Sa\xfade', u'http://www.cartacapital.com.br/saude/saude/atom.xml'),
|
|
||||||
(u'Educa\xe7\xe3o',
|
|
||||||
u'http://www.cartacapital.com.br/educacao/educacao/atom.xml')]
|
|
@ -1,69 +0,0 @@
|
|||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
|
|
||||||
class AdvancedUserRecipe1277228948(BasicNewsRecipe):
|
|
||||||
title = u'China Press USA'
|
|
||||||
oldest_article = 7
|
|
||||||
max_articles_per_feed = 100
|
|
||||||
|
|
||||||
__author__ = 'rty'
|
|
||||||
__version__ = '1.0'
|
|
||||||
language = 'zh'
|
|
||||||
pubisher = 'www.chinapressusa.com'
|
|
||||||
description = 'Overseas Chinese Network Newspaper in the USA'
|
|
||||||
category = 'News in Chinese, USA'
|
|
||||||
remove_javascript = True
|
|
||||||
use_embedded_content = False
|
|
||||||
no_stylesheets = True
|
|
||||||
encoding = 'UTF-8'
|
|
||||||
conversion_options = {'linearize_tables': True}
|
|
||||||
masthead_url = 'http://www.chinapressusa.com/common/images/logo.gif'
|
|
||||||
extra_css = '''
|
|
||||||
@font-face { font-family: "DroidFont", serif, sans-serif; src: url(res:///system/fonts/DroidSansFallback.ttf); }\n
|
|
||||||
body {
|
|
||||||
margin-right: 8pt;
|
|
||||||
font-family: 'DroidFont', serif;}
|
|
||||||
h1 {font-family: 'DroidFont', serif, sans-serif}
|
|
||||||
.show {font-family: 'DroidFont', serif, sans-serif}
|
|
||||||
'''
|
|
||||||
feeds = [
|
|
||||||
(u'\u65b0\u95fb\u9891\u9053', u'http://news.uschinapress.com/news.xml'),
|
|
||||||
(u'\u534e\u4eba\u9891\u9053', u'http://chinese.uschinapress.com/chinese.xml'),
|
|
||||||
(u'\u8bc4\u8bba\u9891\u9053', u'http://review.uschinapress.com/review.xml'),
|
|
||||||
]
|
|
||||||
keep_only_tags = [
|
|
||||||
dict(name='div', attrs={'class': 'show'}),
|
|
||||||
]
|
|
||||||
remove_tags = [
|
|
||||||
# dict(name='table', attrs={'class':'xle'}),
|
|
||||||
dict(name='div', attrs={'class': 'time'}),
|
|
||||||
]
|
|
||||||
remove_tags_after = [
|
|
||||||
dict(name='div', attrs={'class': 'bank17'}),
|
|
||||||
# dict(name='a', attrs={'class':'ab12'}),
|
|
||||||
]
|
|
||||||
|
|
||||||
def append_page(self, soup, appendtag, position):
|
|
||||||
pager = soup.find('div', attrs={'id': 'displaypagenum'})
|
|
||||||
if pager:
|
|
||||||
nexturl = self.INDEX + pager.a['href']
|
|
||||||
soup2 = self.index_to_soup(nexturl)
|
|
||||||
texttag = soup2.find('div', attrs={'class': 'show'})
|
|
||||||
for it in texttag.findAll(style=True):
|
|
||||||
del it['style']
|
|
||||||
newpos = len(texttag.contents)
|
|
||||||
self.append_page(soup2, texttag, newpos)
|
|
||||||
texttag.extract()
|
|
||||||
appendtag.insert(position, texttag)
|
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
|
||||||
mtag = '<meta http-equiv="Content-Language" content="zh-CN"/>\n<meta http-equiv="Content-Type" content="text/html; charset=utf-8"/>'
|
|
||||||
soup.head.insert(0, mtag)
|
|
||||||
|
|
||||||
for item in soup.findAll(style=True):
|
|
||||||
del item['style']
|
|
||||||
self.append_page(soup, soup.body, 3)
|
|
||||||
pager = soup.find('div', attrs={'id': 'displaypagenum'})
|
|
||||||
if pager:
|
|
||||||
pager.extract()
|
|
||||||
return soup
|
|
@ -1,73 +0,0 @@
|
|||||||
#!/usr/bin/env python2
|
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
|
||||||
__copyright__ = '2010, Derek Liang <Derek.liang.ca @@@at@@@ gmail.com>'
|
|
||||||
'''
|
|
||||||
cnd.org
|
|
||||||
'''
|
|
||||||
import re
|
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
|
|
||||||
class TheCND(BasicNewsRecipe):
|
|
||||||
|
|
||||||
title = 'CND'
|
|
||||||
__author__ = 'Derek Liang'
|
|
||||||
description = ''
|
|
||||||
INDEX = 'http://cnd.org'
|
|
||||||
language = 'zh'
|
|
||||||
conversion_options = {'linearize_tables': True}
|
|
||||||
|
|
||||||
remove_tags_before = dict(name='div', id='articleHead')
|
|
||||||
remove_tags_after = dict(id='copyright')
|
|
||||||
remove_tags = [dict(name='table', attrs={'align': 'right'}), dict(name='img', attrs={
|
|
||||||
'src': 'http://my.cnd.org/images/logo.gif'}), dict(name='hr', attrs={}), dict(name='small', attrs={})]
|
|
||||||
no_stylesheets = True
|
|
||||||
|
|
||||||
preprocess_regexps = [(re.compile(r'<!--.*?-->', re.DOTALL), lambda m: ''),
|
|
||||||
(re.compile('<table width.*?</table>',
|
|
||||||
re.DOTALL), lambda m: ''),
|
|
||||||
]
|
|
||||||
|
|
||||||
def print_version(self, url):
|
|
||||||
if url.find('news/article.php') >= 0:
|
|
||||||
return re.sub("^[^=]*", "http://my.cnd.org/modules/news/print.php?storyid", url)
|
|
||||||
else:
|
|
||||||
return re.sub("^[^=]*", "http://my.cnd.org/modules/wfsection/print.php?articleid", url)
|
|
||||||
|
|
||||||
def parse_index(self):
|
|
||||||
soup = self.index_to_soup(self.INDEX)
|
|
||||||
|
|
||||||
feeds = []
|
|
||||||
articles = {}
|
|
||||||
|
|
||||||
for a in soup.findAll('a', attrs={'target': '_cnd'}):
|
|
||||||
url = a['href']
|
|
||||||
if url.find('article.php') < 0:
|
|
||||||
continue
|
|
||||||
if url.startswith('/'):
|
|
||||||
url = 'http://cnd.org' + url
|
|
||||||
title = self.tag_to_string(a)
|
|
||||||
self.log('\tFound article: ', title, 'at', url)
|
|
||||||
date = a.nextSibling
|
|
||||||
if re.search('cm', date):
|
|
||||||
continue
|
|
||||||
if (date is not None) and len(date) > 2:
|
|
||||||
if date not in articles:
|
|
||||||
articles[date] = []
|
|
||||||
articles[date].append(
|
|
||||||
{'title': title, 'url': url, 'description': '', 'date': ''})
|
|
||||||
self.log('\t\tAppend to : ', date)
|
|
||||||
|
|
||||||
mostCurrent = sorted(articles).pop()
|
|
||||||
self.title = 'CND ' + mostCurrent
|
|
||||||
|
|
||||||
feeds.append((self.title, articles[mostCurrent]))
|
|
||||||
|
|
||||||
return feeds
|
|
||||||
|
|
||||||
def populate_article_metadata(self, article, soup, first):
|
|
||||||
header = soup.find('h3')
|
|
||||||
self.log('header: ' + self.tag_to_string(header))
|
|
||||||
pass
|
|
@ -1,74 +0,0 @@
|
|||||||
#!/usr/bin/env python2
|
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
|
||||||
__copyright__ = '2010, Derek Liang <Derek.liang.ca @@@at@@@ gmail.com>'
|
|
||||||
'''
|
|
||||||
cnd.org
|
|
||||||
'''
|
|
||||||
import re
|
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
|
|
||||||
class TheCND(BasicNewsRecipe):
|
|
||||||
|
|
||||||
title = 'CND Weekly'
|
|
||||||
__author__ = 'Derek Liang'
|
|
||||||
description = ''
|
|
||||||
INDEX = 'http://cnd.org'
|
|
||||||
language = 'zh'
|
|
||||||
conversion_options = {'linearize_tables': True}
|
|
||||||
|
|
||||||
remove_tags_before = dict(name='div', id='articleHead')
|
|
||||||
remove_tags_after = dict(id='copyright')
|
|
||||||
remove_tags = [dict(name='table', attrs={'align': 'right'}), dict(name='img', attrs={
|
|
||||||
'src': 'http://my.cnd.org/images/logo.gif'}), dict(name='hr', attrs={}), dict(name='small', attrs={})]
|
|
||||||
no_stylesheets = True
|
|
||||||
|
|
||||||
preprocess_regexps = [(re.compile(r'<!--.*?-->', re.DOTALL), lambda m: ''),
|
|
||||||
(re.compile('<table width.*?</table>',
|
|
||||||
re.DOTALL), lambda m: ''),
|
|
||||||
]
|
|
||||||
|
|
||||||
def print_version(self, url):
|
|
||||||
if url.find('news/article.php') >= 0:
|
|
||||||
return re.sub("^[^=]*", "http://my.cnd.org/modules/news/print.php?storyid", url)
|
|
||||||
else:
|
|
||||||
return re.sub("^[^=]*", "http://my.cnd.org/modules/wfsection/print.php?articleid", url)
|
|
||||||
|
|
||||||
def parse_index(self):
|
|
||||||
soup = self.index_to_soup(self.INDEX)
|
|
||||||
|
|
||||||
feeds = []
|
|
||||||
articles = {}
|
|
||||||
|
|
||||||
for a in soup.findAll('a', attrs={'target': '_cnd'}):
|
|
||||||
url = a['href']
|
|
||||||
if url.find('article.php') < 0:
|
|
||||||
continue
|
|
||||||
if url.startswith('/'):
|
|
||||||
url = 'http://cnd.org' + url
|
|
||||||
title = self.tag_to_string(a)
|
|
||||||
date = a.nextSibling
|
|
||||||
if not re.search('cm', date):
|
|
||||||
continue
|
|
||||||
self.log('\tFound article: ', title, 'at', url, '@', date)
|
|
||||||
if (date is not None) and len(date) > 2:
|
|
||||||
if date not in articles:
|
|
||||||
articles[date] = []
|
|
||||||
articles[date].append(
|
|
||||||
{'title': title, 'url': url, 'description': '', 'date': ''})
|
|
||||||
self.log('\t\tAppend to : ', date)
|
|
||||||
|
|
||||||
sorted_articles = sorted(articles)
|
|
||||||
while sorted_articles:
|
|
||||||
mostCurrent = sorted_articles.pop()
|
|
||||||
self.title = 'CND ' + mostCurrent
|
|
||||||
feeds.append((self.title, articles[mostCurrent]))
|
|
||||||
|
|
||||||
return feeds
|
|
||||||
|
|
||||||
def populate_article_metadata(self, article, soup, first):
|
|
||||||
header = soup.find('h3')
|
|
||||||
self.log('header: ' + self.tag_to_string(header))
|
|
||||||
pass
|
|
@ -1,76 +0,0 @@
|
|||||||
#!/usr/bin/env python2
|
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
|
||||||
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
|
||||||
|
|
||||||
'''
|
|
||||||
dnevniavaz.ba
|
|
||||||
'''
|
|
||||||
|
|
||||||
import re
|
|
||||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
|
||||||
from calibre.ebooks.BeautifulSoup import Tag
|
|
||||||
|
|
||||||
|
|
||||||
def new_tag(soup, name, attrs=()):
|
|
||||||
impl = getattr(soup, 'new_tag', None)
|
|
||||||
if impl is not None:
|
|
||||||
return impl(name, attrs=dict(attrs))
|
|
||||||
return Tag(soup, name, attrs=attrs or None)
|
|
||||||
|
|
||||||
|
|
||||||
class DnevniAvaz(BasicNewsRecipe):
|
|
||||||
title = 'Dnevni Avaz'
|
|
||||||
__author__ = 'Darko Miletic'
|
|
||||||
description = 'Latest news from Bosnia'
|
|
||||||
publisher = 'Dnevni Avaz'
|
|
||||||
category = 'news, politics, Bosnia and Herzegovina'
|
|
||||||
oldest_article = 2
|
|
||||||
max_articles_per_feed = 100
|
|
||||||
no_stylesheets = True
|
|
||||||
encoding = 'utf-8'
|
|
||||||
use_embedded_content = False
|
|
||||||
remove_javascript = True
|
|
||||||
cover_url = 'http://www.dnevniavaz.ba/img/logo.gif'
|
|
||||||
lang = 'bs-BA'
|
|
||||||
language = 'bs'
|
|
||||||
|
|
||||||
direction = 'ltr'
|
|
||||||
|
|
||||||
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: serif1, serif}' # noqa
|
|
||||||
|
|
||||||
conversion_options = {
|
|
||||||
'comment': description, 'tags': category, 'publisher': publisher, 'language': lang, 'pretty_print': True
|
|
||||||
}
|
|
||||||
|
|
||||||
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
|
||||||
|
|
||||||
keep_only_tags = [dict(name='div', attrs={'id': [
|
|
||||||
'fullarticle-title', 'fullarticle-leading', 'fullarticle-date', 'fullarticle-text', 'articleauthor']})]
|
|
||||||
|
|
||||||
remove_tags = [dict(name=['object', 'link', 'base'])]
|
|
||||||
|
|
||||||
feeds = [
|
|
||||||
(u'Najnovije', u'http://www.dnevniavaz.ba/rss/novo'), (u'Najpopularnije',
|
|
||||||
u'http://www.dnevniavaz.ba/rss/popularno')
|
|
||||||
]
|
|
||||||
|
|
||||||
def replace_tagname(self, soup, tagname, tagid, newtagname):
|
|
||||||
headtag = soup.find(tagname, attrs={'id': tagid})
|
|
||||||
if headtag:
|
|
||||||
headtag.name = newtagname
|
|
||||||
return
|
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
|
||||||
soup.html['xml:lang'] = self.lang
|
|
||||||
soup.html['lang'] = self.lang
|
|
||||||
mlang = new_tag(soup, 'meta', [
|
|
||||||
("http-equiv", "Content-Language"), ("content", self.lang)])
|
|
||||||
mcharset = new_tag(soup, 'meta', [
|
|
||||||
("http-equiv", "Content-Type"), ("content", "text/html; charset=UTF-8")])
|
|
||||||
soup.head.insert(0, mlang)
|
|
||||||
soup.head.insert(1, mcharset)
|
|
||||||
self.replace_tagname(soup, 'div', 'fullarticle-title', 'h1')
|
|
||||||
self.replace_tagname(soup, 'div', 'fullarticle-leading', 'h3')
|
|
||||||
self.replace_tagname(soup, 'div', 'fullarticle-date', 'h5')
|
|
||||||
return self.adeify_images(soup)
|
|
@ -1,73 +0,0 @@
|
|||||||
#!/usr/bin/env python2
|
|
||||||
# -*- coding: utf-8 -*-
|
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
|
||||||
__copyright__ = '30 October 2010, Jordi Balcells based on an earlier recipe by Darko Miletic <darko.miletic at gmail.com>'
|
|
||||||
'''
|
|
||||||
elperiodico.cat
|
|
||||||
'''
|
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
from calibre.ebooks.BeautifulSoup import Tag
|
|
||||||
|
|
||||||
|
|
||||||
def new_tag(soup, name, attrs=()):
|
|
||||||
impl = getattr(soup, 'new_tag', None)
|
|
||||||
if impl is not None:
|
|
||||||
return impl(name, attrs=dict(attrs))
|
|
||||||
return Tag(soup, name, attrs=attrs or None)
|
|
||||||
|
|
||||||
|
|
||||||
class ElPeriodico_cat(BasicNewsRecipe):
|
|
||||||
title = 'El Periodico de Catalunya'
|
|
||||||
__author__ = 'Jordi Balcells/Darko Miletic'
|
|
||||||
description = 'Noticies des de Catalunya'
|
|
||||||
publisher = 'elperiodico.cat'
|
|
||||||
category = 'news, politics, Spain, Catalunya'
|
|
||||||
oldest_article = 2
|
|
||||||
max_articles_per_feed = 100
|
|
||||||
no_stylesheets = True
|
|
||||||
use_embedded_content = False
|
|
||||||
delay = 1
|
|
||||||
encoding = 'cp1252'
|
|
||||||
language = 'ca'
|
|
||||||
|
|
||||||
html2lrf_options = [
|
|
||||||
'--comment', description, '--category', category, '--publisher', publisher
|
|
||||||
]
|
|
||||||
|
|
||||||
html2epub_options = 'publisher="' + publisher + \
|
|
||||||
'"\ncomments="' + description + '"\ntags="' + category + '"'
|
|
||||||
|
|
||||||
feeds = [(u'Portada', u'http://www.elperiodico.cat/ca/rss/rss_portada.xml'),
|
|
||||||
(u'Internacional', u'http://www.elperiodico.cat/ca/rss/internacional/rss.xml'),
|
|
||||||
(u'Societat', u'http://www.elperiodico.cat/ca/rss/societat/rss.xml'),
|
|
||||||
(u'Ci\xe8ncia i tecnologia',
|
|
||||||
u'http://www.elperiodico.cat/ca/rss/ciencia-i-tecnologia/rss.xml'),
|
|
||||||
(u'Esports', u'http://www.elperiodico.cat/ca/rss/esports/rss.xml'),
|
|
||||||
(u'Gent', u'http://www.elperiodico.cat/ca/rss/gent/rss.xml'),
|
|
||||||
(u'Opini\xf3', u'http://www.elperiodico.cat/ca/rss/opinio/rss.xml'),
|
|
||||||
(u'Pol\xedtica', u'http://www.elperiodico.cat/ca/rss/politica/rss.xml'),
|
|
||||||
(u'Barcelona', u'http://www.elperiodico.cat/ca/rss/barcelona/rss.xml'),
|
|
||||||
(u'Economia', u'http://www.elperiodico.cat/ca/rss/economia/rss.xml'),
|
|
||||||
(u'Cultura i espectacles',
|
|
||||||
u'http://www.elperiodico.cat/ca/rss/cultura-i-espectacles/rss.xml'),
|
|
||||||
(u'Tele', u'http://www.elperiodico.cat/ca/rss/tele/rss.xml')]
|
|
||||||
|
|
||||||
keep_only_tags = [dict(name='div', attrs={'class': 'titularnoticia'}),
|
|
||||||
dict(name='div', attrs={'class': 'noticia_completa'})]
|
|
||||||
|
|
||||||
remove_tags = [dict(name='div', attrs={'class': ['opcionb', 'opcionb last', 'columna_noticia']}),
|
|
||||||
dict(name='span', attrs={'class': 'opcionesnoticia'})
|
|
||||||
]
|
|
||||||
|
|
||||||
def print_version(self, url):
|
|
||||||
return url.replace('/default.asp?', '/print.asp?')
|
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
|
||||||
mcharset = new_tag(soup, 'meta', [
|
|
||||||
("http-equiv", "Content-Type"), ("content", "text/html; charset=utf-8")])
|
|
||||||
soup.head.insert(0, mcharset)
|
|
||||||
for item in soup.findAll(style=True):
|
|
||||||
del item['style']
|
|
||||||
return soup
|
|
@ -1,28 +0,0 @@
|
|||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
|
|
||||||
class Escrevinhador(BasicNewsRecipe):
|
|
||||||
title = 'Blog Escrevinhador'
|
|
||||||
__author__ = 'Diniz Bortolotto'
|
|
||||||
description = 'Posts do Blog Escrevinhador'
|
|
||||||
publisher = 'Rodrigo Viana'
|
|
||||||
oldest_article = 5
|
|
||||||
max_articles_per_feed = 20
|
|
||||||
category = 'news, politics, Brazil'
|
|
||||||
language = 'pt_BR'
|
|
||||||
publication_type = 'news and politics portal'
|
|
||||||
use_embedded_content = False
|
|
||||||
no_stylesheets = True
|
|
||||||
remove_javascript = True
|
|
||||||
|
|
||||||
feeds = [(u'Blog Escrevinhador', u'http://www.rodrigovianna.com.br/feed')]
|
|
||||||
|
|
||||||
reverse_article_order = True
|
|
||||||
|
|
||||||
remove_tags_after = [dict(name='div', attrs={'class': 'text'})]
|
|
||||||
|
|
||||||
remove_tags = [
|
|
||||||
dict(id='header'),
|
|
||||||
dict(name='p', attrs={'class': 'tags'}),
|
|
||||||
dict(name='div', attrs={'class': 'sociable'})
|
|
||||||
]
|
|
@ -1,49 +0,0 @@
|
|||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
|
|
||||||
class IDGNow(BasicNewsRecipe):
|
|
||||||
title = 'IDG Now!'
|
|
||||||
__author__ = 'Diniz Bortolotto'
|
|
||||||
description = 'Posts do IDG Now!'
|
|
||||||
oldest_article = 7
|
|
||||||
max_articles_per_feed = 20
|
|
||||||
encoding = 'utf8'
|
|
||||||
publisher = 'Now!Digital Business Ltda.'
|
|
||||||
category = 'technology, telecom, IT, Brazil'
|
|
||||||
language = 'pt_BR'
|
|
||||||
publication_type = 'technology portal'
|
|
||||||
use_embedded_content = False
|
|
||||||
extra_css = '.headline {font-size: x-large;} \n .fact { padding-top: 10pt }'
|
|
||||||
|
|
||||||
def get_article_url(self, article):
|
|
||||||
link = article.get('link', None)
|
|
||||||
if link is None:
|
|
||||||
return article
|
|
||||||
if link.split('/')[-1] == "story01.htm":
|
|
||||||
link = link.split('/')[-2]
|
|
||||||
a = ['0B', '0C', '0D', '0E', '0F', '0G',
|
|
||||||
'0I', '0N', '0L0S', '0A', '0J3A']
|
|
||||||
b = ['.', '/', '?', '-', '=', '&', '_', '.com', 'www.', '0', ':']
|
|
||||||
for i in range(0, len(a)):
|
|
||||||
link = link.replace(a[i], b[i])
|
|
||||||
link = link.split('&')[-3]
|
|
||||||
link = link.split('=')[1]
|
|
||||||
link = link + "/IDGNoticiaPrint_view"
|
|
||||||
return link
|
|
||||||
|
|
||||||
feeds = [
|
|
||||||
(u'Ultimas noticias', u'http://rss.idgnow.com.br/c/32184/f/499640/index.rss'),
|
|
||||||
(u'Computa\xe7\xe3o Corporativa',
|
|
||||||
u'http://rss.idgnow.com.br/c/32184/f/499643/index.rss'),
|
|
||||||
(u'Carreira', u'http://rss.idgnow.com.br/c/32184/f/499644/index.rss'),
|
|
||||||
(u'Computa\xe7\xe3o Pessoal',
|
|
||||||
u'http://rss.idgnow.com.br/c/32184/f/499645/index.rss'),
|
|
||||||
(u'Internet', u'http://rss.idgnow.com.br/c/32184/f/499646/index.rss'),
|
|
||||||
(u'Mercado', u'http://rss.idgnow.com.br/c/32184/f/419982/index.rss'),
|
|
||||||
(u'Seguran\xe7a',
|
|
||||||
u'http://rss.idgnow.com.br/c/32184/f/499647/index.rss'),
|
|
||||||
(u'Telecom e Redes',
|
|
||||||
u'http://rss.idgnow.com.br/c/32184/f/499648/index.rss')
|
|
||||||
]
|
|
||||||
|
|
||||||
reverse_article_order = True
|
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -1,106 +0,0 @@
|
|||||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:fdm=marker:ai
|
|
||||||
from __future__ import unicode_literals
|
|
||||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
|
||||||
from lxml import html
|
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
|
||||||
|
|
||||||
|
|
||||||
class Nfcmag(BasicNewsRecipe):
|
|
||||||
|
|
||||||
__author__ = '2014, Chen Wei <weichen302@gmx.com>'
|
|
||||||
title = 'Nan Feng Chuang / South Reviews Magazine'
|
|
||||||
description = '''
|
|
||||||
South Reviews Magazine, established in 1985, is a Guangzhou-based political and
|
|
||||||
economic biweekly. South Reviews enjoys a reputation of being fair and objective, with graceful
|
|
||||||
narration, insightful expression among its readers, mostly government
|
|
||||||
officials, economic leaders and intellectuals. It has been praised as “the No.1
|
|
||||||
Political& Economical Magazine in China”.
|
|
||||||
|
|
||||||
The US magazine Time described South Reviews as "a highbrow news magazine".
|
|
||||||
Other international media organizations such as BBC and NHK have conducted
|
|
||||||
tracking shots of South Reviews journalists, to record their unique value
|
|
||||||
special position in China’s media industry. Harvard-Yenching Library, Stanford
|
|
||||||
University's East Asia Library and UC Berkeley Library have collections of the
|
|
||||||
magazine since its first issue, taking them as an important source to
|
|
||||||
understand China's economic and social reform.
|
|
||||||
|
|
||||||
Since 2008, South Reviews has been committed to transforming into a
|
|
||||||
research-based media organization. Most of its editors, reporters and
|
|
||||||
contributors have remarkably strong academic backgrounds, coming from Peking
|
|
||||||
University, Tsinghua University, London School of Economics and Political
|
|
||||||
Science, the Chinese University of Hong Kong, Renmin University of China, and
|
|
||||||
other well-known institutions. The magazine has established research divisions,
|
|
||||||
including the State Policy Research Center and the Brand Promotion Research
|
|
||||||
Center, working in cooperation with well-known academic institutions and
|
|
||||||
providing valuable research reports for governments and companies.
|
|
||||||
|
|
||||||
'''
|
|
||||||
language = 'zh'
|
|
||||||
encoding = 'UTF-8'
|
|
||||||
publisher = 'South Reviews Magazine'
|
|
||||||
publication_type = 'magazine'
|
|
||||||
category = 'news, Business, China'
|
|
||||||
timefmt = ' [%a, %d %b, %Y]'
|
|
||||||
needs_subscription = False
|
|
||||||
|
|
||||||
remove_tags = [dict(attrs={'class': ['side-left', 'side-right',
|
|
||||||
'breadcrumbs', 'score', 'weboNav']}),
|
|
||||||
dict(attrs={'id': ['header', 'footer']}),
|
|
||||||
dict(name=['script', 'noscript', 'style'])]
|
|
||||||
no_stylesheets = True
|
|
||||||
remove_javascript = True
|
|
||||||
current_issue_url = ""
|
|
||||||
current_issue_cover = ""
|
|
||||||
|
|
||||||
def parse_index(self):
|
|
||||||
|
|
||||||
baseurl = 'http://www.nfcmag.com/'
|
|
||||||
raw = self.index_to_soup('http://www.nfcmag.com/magazine', raw=True)
|
|
||||||
soup_start = html.fromstring(raw)
|
|
||||||
|
|
||||||
els = soup_start.xpath("""//div[contains(@class, 'lastest-magazine')
|
|
||||||
and contains(@class, 'comBox')]
|
|
||||||
//a[@href and not(@id) and not(child::img)]
|
|
||||||
""")
|
|
||||||
for x in els:
|
|
||||||
issueurl = x.get('href')
|
|
||||||
if not issueurl.lower().startswith('http://'):
|
|
||||||
issueurl = baseurl + issueurl
|
|
||||||
break
|
|
||||||
|
|
||||||
raw = self.index_to_soup(issueurl, raw=True)
|
|
||||||
soup_issue = html.fromstring(raw)
|
|
||||||
|
|
||||||
coverimg = soup_issue.xpath("""//div[contains(@class, 'lastest-magazine')
|
|
||||||
and contains(@class, 'comBox')]
|
|
||||||
//img[@*] """)
|
|
||||||
imgurl = coverimg[0].get('src')
|
|
||||||
if not imgurl.lower().startswith('http://'):
|
|
||||||
imgurl = baseurl + imgurl
|
|
||||||
self.current_issue_cover = imgurl
|
|
||||||
feeds = []
|
|
||||||
|
|
||||||
sections = soup_issue.xpath("""//div[contains(@class, 'article-box')
|
|
||||||
and contains(@class, 'comBox')] """)
|
|
||||||
for sec in sections:
|
|
||||||
pages = sec.xpath('.//h5')
|
|
||||||
sec_title = sec.xpath('.//h4')[0].text_content()
|
|
||||||
self.log('Found section:', sec_title)
|
|
||||||
articles = []
|
|
||||||
for x in pages:
|
|
||||||
url = x.xpath('.//a')[0].get('href')
|
|
||||||
if not url.lower().startswith('http://'):
|
|
||||||
url = baseurl + url
|
|
||||||
url = url[:-5] + '-s.html' # to print view
|
|
||||||
|
|
||||||
title = x.text_content()
|
|
||||||
|
|
||||||
articles.append({'title': title, 'url': url, 'date': None})
|
|
||||||
|
|
||||||
if articles:
|
|
||||||
feeds.append((sec_title, articles))
|
|
||||||
return feeds
|
|
||||||
|
|
||||||
def get_cover_url(self):
|
|
||||||
return self.current_issue_cover
|
|
@ -1,43 +0,0 @@
|
|||||||
import re
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
|
|
||||||
class PortalR7(BasicNewsRecipe):
|
|
||||||
title = 'Noticias R7'
|
|
||||||
__author__ = 'Diniz Bortolotto'
|
|
||||||
description = 'Noticias Portal R7'
|
|
||||||
oldest_article = 2
|
|
||||||
max_articles_per_feed = 20
|
|
||||||
encoding = 'utf8'
|
|
||||||
publisher = 'Rede Record'
|
|
||||||
category = 'news, Brazil'
|
|
||||||
language = 'pt_BR'
|
|
||||||
publication_type = 'newsportal'
|
|
||||||
use_embedded_content = False
|
|
||||||
no_stylesheets = True
|
|
||||||
remove_javascript = True
|
|
||||||
remove_attributes = ['style']
|
|
||||||
|
|
||||||
feeds = [
|
|
||||||
(u'Brasil', u'http://www.r7.com/data/rss/brasil.xml'),
|
|
||||||
(u'Economia', u'http://www.r7.com/data/rss/economia.xml'),
|
|
||||||
(u'Internacional',
|
|
||||||
u'http://www.r7.com/data/rss/internacional.xml'),
|
|
||||||
(u'Tecnologia e Ci\xeancia',
|
|
||||||
u'http://www.r7.com/data/rss/tecnologiaCiencia.xml')
|
|
||||||
]
|
|
||||||
reverse_article_order = True
|
|
||||||
|
|
||||||
keep_only_tags = [dict(name='div', attrs={'class': 'materia'})]
|
|
||||||
remove_tags = [
|
|
||||||
dict(id=['espalhe', 'report-erro']),
|
|
||||||
dict(name='ul', attrs={'class': 'controles'}),
|
|
||||||
dict(name='ul', attrs={'class': 'relacionados'}),
|
|
||||||
dict(name='div', attrs={'class': 'materia_banner'}),
|
|
||||||
dict(name='div', attrs={'class': 'materia_controles'})
|
|
||||||
]
|
|
||||||
|
|
||||||
preprocess_regexps = [
|
|
||||||
(re.compile(r'<div class="materia">.*<div class="materia_cabecalho">', re.DOTALL | re.IGNORECASE),
|
|
||||||
lambda match: '<div class="materia"><div class="materia_cabecalho">')
|
|
||||||
]
|
|
@ -1,24 +0,0 @@
|
|||||||
# -*- coding: utf-8 -*-
|
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
|
|
||||||
class NoticiasUnB(BasicNewsRecipe):
|
|
||||||
title = 'Noticias UnB'
|
|
||||||
__author__ = 'Diniz Bortolotto'
|
|
||||||
description = 'Noticias da UnB'
|
|
||||||
oldest_article = 5
|
|
||||||
max_articles_per_feed = 20
|
|
||||||
category = 'news, educational, Brazil'
|
|
||||||
language = 'pt_BR'
|
|
||||||
publication_type = 'newsportal'
|
|
||||||
use_embedded_content = False
|
|
||||||
no_stylesheets = True
|
|
||||||
remove_javascript = True
|
|
||||||
|
|
||||||
feeds = [(u'UnB Agência', u'http://www.unb.br/noticias/rss/noticias.rss')]
|
|
||||||
|
|
||||||
reverse_article_order = True
|
|
||||||
|
|
||||||
def print_version(self, url):
|
|
||||||
return url.replace('http://', 'http://www.unb.br/noticias/print_email/imprimir.php?u=http://')
|
|
Loading…
x
Reference in New Issue
Block a user