This commit is contained in:
Kovid Goyal 2020-03-30 08:08:00 +05:30
commit 47fbf3b885
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C
20 changed files with 0 additions and 4097 deletions

View File

@ -1,21 +0,0 @@
from calibre.web.feeds.news import BasicNewsRecipe
class BBCArabic(BasicNewsRecipe):
title = u'BBC Arabic Middle East'
oldest_article = 7
max_articles_per_feed = 100
extra_css = 'body { text-align: right; direction:rtl; } '
auto_cleanup = True
language = 'ar'
__author__ = 'logophile777'
remove_tags = [
{'class': ['emp-alt-handheld', 'emp-noflash',
'emp-flashlink', 'emp-alt-screen']}
]
feeds = [(u'BBC Arabic Middle East',
u'http://www.bbc.co.uk/arabic/middleeast/index.xml')]
def print_version(self, url):
return url + '?print=1'

View File

@ -1,46 +0,0 @@
from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1277443634(BasicNewsRecipe):
title = u'BBC Chinese'
oldest_article = 7
max_articles_per_feed = 100
feeds = [
(u'\u4e3b\u9875', u'http://www.bbc.co.uk/zhongwen/simp/index.xml'),
(u'\u56fd\u9645\u65b0\u95fb',
u'http://www.bbc.co.uk/zhongwen/simp/world/index.xml'),
(u'\u4e24\u5cb8\u4e09\u5730',
u'http://www.bbc.co.uk/zhongwen/simp/china/index.xml'),
(u'\u91d1\u878d\u8d22\u7ecf',
u'http://www.bbc.co.uk/zhongwen/simp/business/index.xml'),
(u'\u7f51\u4e0a\u4e92\u52a8',
u'http://www.bbc.co.uk/zhongwen/simp/interactive/index.xml'),
(u'\u97f3\u89c6\u56fe\u7247',
u'http://www.bbc.co.uk/zhongwen/simp/multimedia/index.xml'),
(u'\u5206\u6790\u8bc4\u8bba',
u'http://www.bbc.co.uk/zhongwen/simp/indepth/index.xml')
]
extra_css = '''
@font-face {font-family: "DroidFont", serif, sans-serif; src: url(res:///system/fonts/DroidSansFallback.ttf); }\n
body {margin-right: 8pt; font-family: 'DroidFont', serif;}\n
h1 {font-family: 'DroidFont', serif;}\n
.articledescription {font-family: 'DroidFont', serif;}
'''
__author__ = 'rty'
__version__ = '1.0'
language = 'zh'
pubisher = 'British Broadcasting Corporation'
description = 'BBC news in Chinese'
category = 'News, Chinese'
remove_javascript = True
use_embedded_content = False
no_stylesheets = True
encoding = 'UTF-8'
conversion_options = {'linearize_tables': True}
masthead_url = 'http://wscdn.bbc.co.uk/zhongwen/simp/images/1024/brand.jpg'
keep_only_tags = [
dict(name='h1'),
dict(name='p', attrs={'class': ['primary-topic', 'summary']}),
dict(name='div', attrs={'class': ['bodytext', 'datestamp']}),
]

View File

@ -1,44 +0,0 @@
__license__ = 'GPL v3'
__copyright__ = '2012, Alayn Gortazar <zutoin at gmail dot com>'
'''
www.berria.info
'''
from calibre.web.feeds.news import BasicNewsRecipe
class Berria(BasicNewsRecipe):
title = 'Berria'
__author__ = 'Alayn Gortazar'
description = 'Euskal Herriko euskarazko egunkaria'
publisher = 'Berria'
category = 'news, politics, sports, Basque Country'
oldest_article = 2
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
language = 'eu'
remove_empty_feeds = True
masthead_url = 'http://upload.wikimedia.org/wikipedia/commons/thumb/6/6a/Berria_Logo.svg/400px-Berria_Logo.svg.png'
keep_only_tags = [
dict(id='goiburua'),
dict(name='div', attrs={'class': ['ber_ikus']}),
dict(name='section', attrs={'class': 'ber_ikus'})
]
remove_tags = [
dict(name='a', attrs={'class': 'iruzkinak'}),
dict(name='div', attrs={'class': 'laguntzaileak'})
]
extra_css = '#goiburua{font-weight: bold} .zintiloa{font-size: small} .sarrera{color:#666} .titularra{font-size: x-large} .sarrera{font-weight: bold} .argazoin{color:#666; font-size: small}' # noqa
feeds = [
(u'Edizioa jarraia', u'http://berria.info/rss/ediziojarraia.xml'),
(u'Iritzia', u'http://berria.info/rss/iritzia.xml'),
(u'Euskal Herria', u'http://berria.info/rss/euskalherria.xml'),
(u'Ekonomia', u'http://berria.info/rss/ekonomia.xml'),
(u'Mundua', u'http://berria.info/rss/mundua.xml'),
(u'Kirola', u'http://berria.info/rss/kirola.xml'),
(u'Plaza', u'http://berria.info/rss/plaza.xml')
]

View File

@ -1,20 +0,0 @@
# -*- coding: utf-8 -*-
from calibre.web.feeds.news import BasicNewsRecipe
class BlogdaCidadania(BasicNewsRecipe):
title = 'Blog da Cidadania'
__author__ = 'Diniz Bortolotto'
description = 'Posts do Blog da Cidadania'
oldest_article = 7
max_articles_per_feed = 50
encoding = 'utf8'
publisher = 'Eduardo Guimaraes'
category = 'politics, Brazil'
language = 'pt_BR'
publication_type = 'politics portal'
feeds = [(u'Blog da Cidadania', u'http://www.blogcidadania.com.br/feed/')]
reverse_article_order = True

View File

@ -1,90 +0,0 @@
import re
from calibre.web.feeds.recipes import BasicNewsRecipe
__license__ = 'GPL v3'
class Caijing(BasicNewsRecipe):
'''based on the recipe wrote by Eric Chen at 2011'''
__author__ = '2014, Chen Wei <weichen302@gmx.com>'
title = 'Caijing Magazine'
description = '''
Founded in 1998, the fortnightly CAIJING Magazine has firmly established
itself as a news authority and leading voice for business and financial
issues in China.
CAIJING Magazine closely tracks the most important aspects of China's
economic reforms, developments and policy changes, as well as major events
in the capital markets. It also offers a broad international perspective
through first-hand reporting on international political and economic
issues.
CAIJING Magazine is China's most widely read business and finance magazine,
with a circulation of 225,000 per issue. It boasts top-level readers from
government, business and academic circles.'''
language = 'zh'
encoding = 'UTF-8'
publisher = 'Caijing Magazine'
publication_type = 'magazine'
category = 'news, Business, China'
timefmt = ' [%a, %d %b, %Y]'
needs_subscription = True
remove_tags = [dict(attrs={'class': ['head_nav', 'mcont_logo', 'header',
'bottom', 'footer', 'magazine_ipad', 'cjartShare', 'ar_about',
'main_rt', 'mcont_nav', 'new']}),
dict(attrs={'id': ['articlePl']}),
dict(name=['script', 'noscript', 'style'])]
no_stylesheets = True
remove_javascript = True
current_issue_url = ""
current_issue_cover = ""
def get_browser(self):
br = BasicNewsRecipe.get_browser(self)
if self.username is not None and self.password is not None:
br.open('http://service.caijing.com.cn/usermanage/login')
br.select_form(name='mainLoginForm')
br['username'] = self.username
br['password'] = self.password
br.submit()
return br
def parse_index(self):
soup_start = self.index_to_soup('http://magazine.caijing.com.cn/')
jumpurl = soup_start.find('script').contents[0].split()
for line in jumpurl:
if 'http' in line.lower():
issuesurl = line.split('"')[1]
break
soup_issues = self.index_to_soup(issuesurl)
# find the latest issue
div = soup_issues.find('div', attrs={'class': 'fmcon'})
current_issue_url = div.find('a', href=True)['href']
soup = self.index_to_soup(current_issue_url)
coverimg = soup.find('div', {'class': 'zzfm_img'})
self.current_issue_cover = coverimg.find('img')['src']
feeds = []
for section in soup.findAll('div',
attrs={'class': re.compile(r'(fmwz_ml|zzlm_nr)2?$')}):
section_title = self.tag_to_string(section.find('div',
attrs={'class': re.compile(r'(lmnav_bt|zzlm_bt)1?$')}))
self.log('Found section:', section_title)
articles = []
for post in section.findAll('div',
attrs={'class': re.compile(r'(fmwz_bt|zzlm_nr_bt)')}):
title = self.tag_to_string(post)
url = post.find('a')['href']
articles.append({'title': title, 'url': url, 'date': None})
if articles:
feeds.append((section_title, articles))
return feeds
def get_cover_url(self):
return self.current_issue_cover

View File

@ -1,16 +0,0 @@
__copyright__ = '2011, Pablo Aldama <pabloaldama at gmail.com>'
from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1311839910(BasicNewsRecipe):
title = u'Caros Amigos'
oldest_article = 20
max_articles_per_feed = 100
language = 'pt_BR'
__author__ = 'Pablo Aldama'
feeds = [(u'Caros Amigos',
u'http://carosamigos.terra.com.br/index2/index.php?format=feed&type=rss')]
keep_only_tags = [dict(name='div', attrs={'class': ['blog']}), dict(name='div', attrs={'class': ['blogcontent']})
]
remove_tags = [dict(name='div', attrs={'class': 'addtoany'})]

View File

@ -1,29 +0,0 @@
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
from __future__ import absolute_import, division, print_function, unicode_literals
from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1380852962(BasicNewsRecipe):
title = u'Carta Capital'
__author__ = 'Erico Lisboa'
language = 'pt_BR'
oldest_article = 15
max_articles_per_feed = 100
auto_cleanup = True
use_embedded_content = False
feeds = [(u'Pol\xedtica',
u'http://www.cartacapital.com.br/politica/politica/rss'), (u'Economia',
u'http://www.cartacapital.com.br/economia/economia/atom.xml'),
(u'Sociedade',
u'http://www.cartacapital.com.br/sociedade/sociedade/atom.xml'),
(u'Internacional',
u'http://www.cartacapital.com.br/internacional/internacional/atom.xml'),
(u'Tecnologia',
u'http://www.cartacapital.com.br/tecnologia/tecnologia/atom.xml'),
(u'Cultura',
u'http://www.cartacapital.com.br/cultura/cultura/atom.xml'),
(u'Sa\xfade', u'http://www.cartacapital.com.br/saude/saude/atom.xml'),
(u'Educa\xe7\xe3o',
u'http://www.cartacapital.com.br/educacao/educacao/atom.xml')]

View File

@ -1,69 +0,0 @@
from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1277228948(BasicNewsRecipe):
title = u'China Press USA'
oldest_article = 7
max_articles_per_feed = 100
__author__ = 'rty'
__version__ = '1.0'
language = 'zh'
pubisher = 'www.chinapressusa.com'
description = 'Overseas Chinese Network Newspaper in the USA'
category = 'News in Chinese, USA'
remove_javascript = True
use_embedded_content = False
no_stylesheets = True
encoding = 'UTF-8'
conversion_options = {'linearize_tables': True}
masthead_url = 'http://www.chinapressusa.com/common/images/logo.gif'
extra_css = '''
@font-face { font-family: "DroidFont", serif, sans-serif; src: url(res:///system/fonts/DroidSansFallback.ttf); }\n
body {
margin-right: 8pt;
font-family: 'DroidFont', serif;}
h1 {font-family: 'DroidFont', serif, sans-serif}
.show {font-family: 'DroidFont', serif, sans-serif}
'''
feeds = [
(u'\u65b0\u95fb\u9891\u9053', u'http://news.uschinapress.com/news.xml'),
(u'\u534e\u4eba\u9891\u9053', u'http://chinese.uschinapress.com/chinese.xml'),
(u'\u8bc4\u8bba\u9891\u9053', u'http://review.uschinapress.com/review.xml'),
]
keep_only_tags = [
dict(name='div', attrs={'class': 'show'}),
]
remove_tags = [
# dict(name='table', attrs={'class':'xle'}),
dict(name='div', attrs={'class': 'time'}),
]
remove_tags_after = [
dict(name='div', attrs={'class': 'bank17'}),
# dict(name='a', attrs={'class':'ab12'}),
]
def append_page(self, soup, appendtag, position):
pager = soup.find('div', attrs={'id': 'displaypagenum'})
if pager:
nexturl = self.INDEX + pager.a['href']
soup2 = self.index_to_soup(nexturl)
texttag = soup2.find('div', attrs={'class': 'show'})
for it in texttag.findAll(style=True):
del it['style']
newpos = len(texttag.contents)
self.append_page(soup2, texttag, newpos)
texttag.extract()
appendtag.insert(position, texttag)
def preprocess_html(self, soup):
mtag = '<meta http-equiv="Content-Language" content="zh-CN"/>\n<meta http-equiv="Content-Type" content="text/html; charset=utf-8"/>'
soup.head.insert(0, mtag)
for item in soup.findAll(style=True):
del item['style']
self.append_page(soup, soup.body, 3)
pager = soup.find('div', attrs={'id': 'displaypagenum'})
if pager:
pager.extract()
return soup

View File

@ -1,73 +0,0 @@
#!/usr/bin/env python2
__license__ = 'GPL v3'
__copyright__ = '2010, Derek Liang <Derek.liang.ca @@@at@@@ gmail.com>'
'''
cnd.org
'''
import re
from calibre.web.feeds.news import BasicNewsRecipe
class TheCND(BasicNewsRecipe):
title = 'CND'
__author__ = 'Derek Liang'
description = ''
INDEX = 'http://cnd.org'
language = 'zh'
conversion_options = {'linearize_tables': True}
remove_tags_before = dict(name='div', id='articleHead')
remove_tags_after = dict(id='copyright')
remove_tags = [dict(name='table', attrs={'align': 'right'}), dict(name='img', attrs={
'src': 'http://my.cnd.org/images/logo.gif'}), dict(name='hr', attrs={}), dict(name='small', attrs={})]
no_stylesheets = True
preprocess_regexps = [(re.compile(r'<!--.*?-->', re.DOTALL), lambda m: ''),
(re.compile('<table width.*?</table>',
re.DOTALL), lambda m: ''),
]
def print_version(self, url):
if url.find('news/article.php') >= 0:
return re.sub("^[^=]*", "http://my.cnd.org/modules/news/print.php?storyid", url)
else:
return re.sub("^[^=]*", "http://my.cnd.org/modules/wfsection/print.php?articleid", url)
def parse_index(self):
soup = self.index_to_soup(self.INDEX)
feeds = []
articles = {}
for a in soup.findAll('a', attrs={'target': '_cnd'}):
url = a['href']
if url.find('article.php') < 0:
continue
if url.startswith('/'):
url = 'http://cnd.org' + url
title = self.tag_to_string(a)
self.log('\tFound article: ', title, 'at', url)
date = a.nextSibling
if re.search('cm', date):
continue
if (date is not None) and len(date) > 2:
if date not in articles:
articles[date] = []
articles[date].append(
{'title': title, 'url': url, 'description': '', 'date': ''})
self.log('\t\tAppend to : ', date)
mostCurrent = sorted(articles).pop()
self.title = 'CND ' + mostCurrent
feeds.append((self.title, articles[mostCurrent]))
return feeds
def populate_article_metadata(self, article, soup, first):
header = soup.find('h3')
self.log('header: ' + self.tag_to_string(header))
pass

View File

@ -1,74 +0,0 @@
#!/usr/bin/env python2
__license__ = 'GPL v3'
__copyright__ = '2010, Derek Liang <Derek.liang.ca @@@at@@@ gmail.com>'
'''
cnd.org
'''
import re
from calibre.web.feeds.news import BasicNewsRecipe
class TheCND(BasicNewsRecipe):
title = 'CND Weekly'
__author__ = 'Derek Liang'
description = ''
INDEX = 'http://cnd.org'
language = 'zh'
conversion_options = {'linearize_tables': True}
remove_tags_before = dict(name='div', id='articleHead')
remove_tags_after = dict(id='copyright')
remove_tags = [dict(name='table', attrs={'align': 'right'}), dict(name='img', attrs={
'src': 'http://my.cnd.org/images/logo.gif'}), dict(name='hr', attrs={}), dict(name='small', attrs={})]
no_stylesheets = True
preprocess_regexps = [(re.compile(r'<!--.*?-->', re.DOTALL), lambda m: ''),
(re.compile('<table width.*?</table>',
re.DOTALL), lambda m: ''),
]
def print_version(self, url):
if url.find('news/article.php') >= 0:
return re.sub("^[^=]*", "http://my.cnd.org/modules/news/print.php?storyid", url)
else:
return re.sub("^[^=]*", "http://my.cnd.org/modules/wfsection/print.php?articleid", url)
def parse_index(self):
soup = self.index_to_soup(self.INDEX)
feeds = []
articles = {}
for a in soup.findAll('a', attrs={'target': '_cnd'}):
url = a['href']
if url.find('article.php') < 0:
continue
if url.startswith('/'):
url = 'http://cnd.org' + url
title = self.tag_to_string(a)
date = a.nextSibling
if not re.search('cm', date):
continue
self.log('\tFound article: ', title, 'at', url, '@', date)
if (date is not None) and len(date) > 2:
if date not in articles:
articles[date] = []
articles[date].append(
{'title': title, 'url': url, 'description': '', 'date': ''})
self.log('\t\tAppend to : ', date)
sorted_articles = sorted(articles)
while sorted_articles:
mostCurrent = sorted_articles.pop()
self.title = 'CND ' + mostCurrent
feeds.append((self.title, articles[mostCurrent]))
return feeds
def populate_article_metadata(self, article, soup, first):
header = soup.find('h3')
self.log('header: ' + self.tag_to_string(header))
pass

View File

@ -1,76 +0,0 @@
#!/usr/bin/env python2
__license__ = 'GPL v3'
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
'''
dnevniavaz.ba
'''
import re
from calibre.web.feeds.recipes import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import Tag
def new_tag(soup, name, attrs=()):
impl = getattr(soup, 'new_tag', None)
if impl is not None:
return impl(name, attrs=dict(attrs))
return Tag(soup, name, attrs=attrs or None)
class DnevniAvaz(BasicNewsRecipe):
title = 'Dnevni Avaz'
__author__ = 'Darko Miletic'
description = 'Latest news from Bosnia'
publisher = 'Dnevni Avaz'
category = 'news, politics, Bosnia and Herzegovina'
oldest_article = 2
max_articles_per_feed = 100
no_stylesheets = True
encoding = 'utf-8'
use_embedded_content = False
remove_javascript = True
cover_url = 'http://www.dnevniavaz.ba/img/logo.gif'
lang = 'bs-BA'
language = 'bs'
direction = 'ltr'
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: serif1, serif}' # noqa
conversion_options = {
'comment': description, 'tags': category, 'publisher': publisher, 'language': lang, 'pretty_print': True
}
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
keep_only_tags = [dict(name='div', attrs={'id': [
'fullarticle-title', 'fullarticle-leading', 'fullarticle-date', 'fullarticle-text', 'articleauthor']})]
remove_tags = [dict(name=['object', 'link', 'base'])]
feeds = [
(u'Najnovije', u'http://www.dnevniavaz.ba/rss/novo'), (u'Najpopularnije',
u'http://www.dnevniavaz.ba/rss/popularno')
]
def replace_tagname(self, soup, tagname, tagid, newtagname):
headtag = soup.find(tagname, attrs={'id': tagid})
if headtag:
headtag.name = newtagname
return
def preprocess_html(self, soup):
soup.html['xml:lang'] = self.lang
soup.html['lang'] = self.lang
mlang = new_tag(soup, 'meta', [
("http-equiv", "Content-Language"), ("content", self.lang)])
mcharset = new_tag(soup, 'meta', [
("http-equiv", "Content-Type"), ("content", "text/html; charset=UTF-8")])
soup.head.insert(0, mlang)
soup.head.insert(1, mcharset)
self.replace_tagname(soup, 'div', 'fullarticle-title', 'h1')
self.replace_tagname(soup, 'div', 'fullarticle-leading', 'h3')
self.replace_tagname(soup, 'div', 'fullarticle-date', 'h5')
return self.adeify_images(soup)

View File

@ -1,73 +0,0 @@
#!/usr/bin/env python2
# -*- coding: utf-8 -*-
__license__ = 'GPL v3'
__copyright__ = '30 October 2010, Jordi Balcells based on an earlier recipe by Darko Miletic <darko.miletic at gmail.com>'
'''
elperiodico.cat
'''
from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import Tag
def new_tag(soup, name, attrs=()):
impl = getattr(soup, 'new_tag', None)
if impl is not None:
return impl(name, attrs=dict(attrs))
return Tag(soup, name, attrs=attrs or None)
class ElPeriodico_cat(BasicNewsRecipe):
title = 'El Periodico de Catalunya'
__author__ = 'Jordi Balcells/Darko Miletic'
description = 'Noticies des de Catalunya'
publisher = 'elperiodico.cat'
category = 'news, politics, Spain, Catalunya'
oldest_article = 2
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
delay = 1
encoding = 'cp1252'
language = 'ca'
html2lrf_options = [
'--comment', description, '--category', category, '--publisher', publisher
]
html2epub_options = 'publisher="' + publisher + \
'"\ncomments="' + description + '"\ntags="' + category + '"'
feeds = [(u'Portada', u'http://www.elperiodico.cat/ca/rss/rss_portada.xml'),
(u'Internacional', u'http://www.elperiodico.cat/ca/rss/internacional/rss.xml'),
(u'Societat', u'http://www.elperiodico.cat/ca/rss/societat/rss.xml'),
(u'Ci\xe8ncia i tecnologia',
u'http://www.elperiodico.cat/ca/rss/ciencia-i-tecnologia/rss.xml'),
(u'Esports', u'http://www.elperiodico.cat/ca/rss/esports/rss.xml'),
(u'Gent', u'http://www.elperiodico.cat/ca/rss/gent/rss.xml'),
(u'Opini\xf3', u'http://www.elperiodico.cat/ca/rss/opinio/rss.xml'),
(u'Pol\xedtica', u'http://www.elperiodico.cat/ca/rss/politica/rss.xml'),
(u'Barcelona', u'http://www.elperiodico.cat/ca/rss/barcelona/rss.xml'),
(u'Economia', u'http://www.elperiodico.cat/ca/rss/economia/rss.xml'),
(u'Cultura i espectacles',
u'http://www.elperiodico.cat/ca/rss/cultura-i-espectacles/rss.xml'),
(u'Tele', u'http://www.elperiodico.cat/ca/rss/tele/rss.xml')]
keep_only_tags = [dict(name='div', attrs={'class': 'titularnoticia'}),
dict(name='div', attrs={'class': 'noticia_completa'})]
remove_tags = [dict(name='div', attrs={'class': ['opcionb', 'opcionb last', 'columna_noticia']}),
dict(name='span', attrs={'class': 'opcionesnoticia'})
]
def print_version(self, url):
return url.replace('/default.asp?', '/print.asp?')
def preprocess_html(self, soup):
mcharset = new_tag(soup, 'meta', [
("http-equiv", "Content-Type"), ("content", "text/html; charset=utf-8")])
soup.head.insert(0, mcharset)
for item in soup.findAll(style=True):
del item['style']
return soup

View File

@ -1,28 +0,0 @@
from calibre.web.feeds.news import BasicNewsRecipe
class Escrevinhador(BasicNewsRecipe):
title = 'Blog Escrevinhador'
__author__ = 'Diniz Bortolotto'
description = 'Posts do Blog Escrevinhador'
publisher = 'Rodrigo Viana'
oldest_article = 5
max_articles_per_feed = 20
category = 'news, politics, Brazil'
language = 'pt_BR'
publication_type = 'news and politics portal'
use_embedded_content = False
no_stylesheets = True
remove_javascript = True
feeds = [(u'Blog Escrevinhador', u'http://www.rodrigovianna.com.br/feed')]
reverse_article_order = True
remove_tags_after = [dict(name='div', attrs={'class': 'text'})]
remove_tags = [
dict(id='header'),
dict(name='p', attrs={'class': 'tags'}),
dict(name='div', attrs={'class': 'sociable'})
]

View File

@ -1,49 +0,0 @@
from calibre.web.feeds.news import BasicNewsRecipe
class IDGNow(BasicNewsRecipe):
title = 'IDG Now!'
__author__ = 'Diniz Bortolotto'
description = 'Posts do IDG Now!'
oldest_article = 7
max_articles_per_feed = 20
encoding = 'utf8'
publisher = 'Now!Digital Business Ltda.'
category = 'technology, telecom, IT, Brazil'
language = 'pt_BR'
publication_type = 'technology portal'
use_embedded_content = False
extra_css = '.headline {font-size: x-large;} \n .fact { padding-top: 10pt }'
def get_article_url(self, article):
link = article.get('link', None)
if link is None:
return article
if link.split('/')[-1] == "story01.htm":
link = link.split('/')[-2]
a = ['0B', '0C', '0D', '0E', '0F', '0G',
'0I', '0N', '0L0S', '0A', '0J3A']
b = ['.', '/', '?', '-', '=', '&', '_', '.com', 'www.', '0', ':']
for i in range(0, len(a)):
link = link.replace(a[i], b[i])
link = link.split('&')[-3]
link = link.split('=')[1]
link = link + "/IDGNoticiaPrint_view"
return link
feeds = [
(u'Ultimas noticias', u'http://rss.idgnow.com.br/c/32184/f/499640/index.rss'),
(u'Computa\xe7\xe3o Corporativa',
u'http://rss.idgnow.com.br/c/32184/f/499643/index.rss'),
(u'Carreira', u'http://rss.idgnow.com.br/c/32184/f/499644/index.rss'),
(u'Computa\xe7\xe3o Pessoal',
u'http://rss.idgnow.com.br/c/32184/f/499645/index.rss'),
(u'Internet', u'http://rss.idgnow.com.br/c/32184/f/499646/index.rss'),
(u'Mercado', u'http://rss.idgnow.com.br/c/32184/f/419982/index.rss'),
(u'Seguran\xe7a',
u'http://rss.idgnow.com.br/c/32184/f/499647/index.rss'),
(u'Telecom e Redes',
u'http://rss.idgnow.com.br/c/32184/f/499648/index.rss')
]
reverse_article_order = True

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -1,106 +0,0 @@
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:fdm=marker:ai
from __future__ import unicode_literals
from calibre.web.feeds.recipes import BasicNewsRecipe
from lxml import html
__license__ = 'GPL v3'
class Nfcmag(BasicNewsRecipe):
__author__ = '2014, Chen Wei <weichen302@gmx.com>'
title = 'Nan Feng Chuang / South Reviews Magazine'
description = '''
South Reviews Magazine, established in 1985, is a Guangzhou-based political and
economic biweekly. South Reviews enjoys a reputation of being fair and objective, with graceful
narration, insightful expression among its readers, mostly government
officials, economic leaders and intellectuals. It has been praised as “the No.1
Political& Economical Magazine in China”.
The US magazine Time described South Reviews as "a highbrow news magazine".
Other international media organizations such as BBC and NHK have conducted
tracking shots of South Reviews journalists, to record their unique value
special position in Chinas media industry. Harvard-Yenching Library, Stanford
University's East Asia Library and UC Berkeley Library have collections of the
magazine since its first issue, taking them as an important source to
understand China's economic and social reform.
Since 2008, South Reviews has been committed to transforming into a
research-based media organization. Most of its editors, reporters and
contributors have remarkably strong academic backgrounds, coming from Peking
University, Tsinghua University, London School of Economics and Political
Science, the Chinese University of Hong Kong, Renmin University of China, and
other well-known institutions. The magazine has established research divisions,
including the State Policy Research Center and the Brand Promotion Research
Center, working in cooperation with well-known academic institutions and
providing valuable research reports for governments and companies.
'''
language = 'zh'
encoding = 'UTF-8'
publisher = 'South Reviews Magazine'
publication_type = 'magazine'
category = 'news, Business, China'
timefmt = ' [%a, %d %b, %Y]'
needs_subscription = False
remove_tags = [dict(attrs={'class': ['side-left', 'side-right',
'breadcrumbs', 'score', 'weboNav']}),
dict(attrs={'id': ['header', 'footer']}),
dict(name=['script', 'noscript', 'style'])]
no_stylesheets = True
remove_javascript = True
current_issue_url = ""
current_issue_cover = ""
def parse_index(self):
baseurl = 'http://www.nfcmag.com/'
raw = self.index_to_soup('http://www.nfcmag.com/magazine', raw=True)
soup_start = html.fromstring(raw)
els = soup_start.xpath("""//div[contains(@class, 'lastest-magazine')
and contains(@class, 'comBox')]
//a[@href and not(@id) and not(child::img)]
""")
for x in els:
issueurl = x.get('href')
if not issueurl.lower().startswith('http://'):
issueurl = baseurl + issueurl
break
raw = self.index_to_soup(issueurl, raw=True)
soup_issue = html.fromstring(raw)
coverimg = soup_issue.xpath("""//div[contains(@class, 'lastest-magazine')
and contains(@class, 'comBox')]
//img[@*] """)
imgurl = coverimg[0].get('src')
if not imgurl.lower().startswith('http://'):
imgurl = baseurl + imgurl
self.current_issue_cover = imgurl
feeds = []
sections = soup_issue.xpath("""//div[contains(@class, 'article-box')
and contains(@class, 'comBox')] """)
for sec in sections:
pages = sec.xpath('.//h5')
sec_title = sec.xpath('.//h4')[0].text_content()
self.log('Found section:', sec_title)
articles = []
for x in pages:
url = x.xpath('.//a')[0].get('href')
if not url.lower().startswith('http://'):
url = baseurl + url
url = url[:-5] + '-s.html' # to print view
title = x.text_content()
articles.append({'title': title, 'url': url, 'date': None})
if articles:
feeds.append((sec_title, articles))
return feeds
def get_cover_url(self):
return self.current_issue_cover

View File

@ -1,43 +0,0 @@
import re
from calibre.web.feeds.news import BasicNewsRecipe
class PortalR7(BasicNewsRecipe):
title = 'Noticias R7'
__author__ = 'Diniz Bortolotto'
description = 'Noticias Portal R7'
oldest_article = 2
max_articles_per_feed = 20
encoding = 'utf8'
publisher = 'Rede Record'
category = 'news, Brazil'
language = 'pt_BR'
publication_type = 'newsportal'
use_embedded_content = False
no_stylesheets = True
remove_javascript = True
remove_attributes = ['style']
feeds = [
(u'Brasil', u'http://www.r7.com/data/rss/brasil.xml'),
(u'Economia', u'http://www.r7.com/data/rss/economia.xml'),
(u'Internacional',
u'http://www.r7.com/data/rss/internacional.xml'),
(u'Tecnologia e Ci\xeancia',
u'http://www.r7.com/data/rss/tecnologiaCiencia.xml')
]
reverse_article_order = True
keep_only_tags = [dict(name='div', attrs={'class': 'materia'})]
remove_tags = [
dict(id=['espalhe', 'report-erro']),
dict(name='ul', attrs={'class': 'controles'}),
dict(name='ul', attrs={'class': 'relacionados'}),
dict(name='div', attrs={'class': 'materia_banner'}),
dict(name='div', attrs={'class': 'materia_controles'})
]
preprocess_regexps = [
(re.compile(r'<div class="materia">.*<div class="materia_cabecalho">', re.DOTALL | re.IGNORECASE),
lambda match: '<div class="materia"><div class="materia_cabecalho">')
]

View File

@ -1,24 +0,0 @@
# -*- coding: utf-8 -*-
from calibre.web.feeds.news import BasicNewsRecipe
class NoticiasUnB(BasicNewsRecipe):
title = 'Noticias UnB'
__author__ = 'Diniz Bortolotto'
description = 'Noticias da UnB'
oldest_article = 5
max_articles_per_feed = 20
category = 'news, educational, Brazil'
language = 'pt_BR'
publication_type = 'newsportal'
use_embedded_content = False
no_stylesheets = True
remove_javascript = True
feeds = [(u'UnB Agência', u'http://www.unb.br/noticias/rss/noticias.rss')]
reverse_article_order = True
def print_version(self, url):
return url.replace('http://', 'http://www.unb.br/noticias/print_email/imprimir.php?u=http://')