Perform PEP8 compliance checks on the entire codebase

Some bits of PEP 8 are turned off via setup.cfg
This commit is contained in:
Kovid Goyal 2016-07-29 11:04:04 +05:30
parent 643977ffa6
commit 567040ee1e
1881 changed files with 49336 additions and 46525 deletions

View File

@ -1,13 +1,13 @@
#!/usr/bin/env python2 #!/usr/bin/env python2
## ##
## Title: Diario 10minutos.com.uy News and Sports Calibre Recipe # Title: Diario 10minutos.com.uy News and Sports Calibre Recipe
## Contact: Carlos Alves - <carlos@carlosalves.info> # Contact: Carlos Alves - <carlos@carlosalves.info>
## ##
## License: GNU General Public License v3 - http://www.gnu.org/copyleft/gpl.html # License: GNU General Public License v3 - http://www.gnu.org/copyleft/gpl.html
## Copyright: Carlos Alves - <carlos@carlosalves.info> # Copyright: Carlos Alves - <carlos@carlosalves.info>
## ##
## Written: September 2013 # Written: September 2013
## Last Edited: 2016-01-11 # Last Edited: 2016-01-11
## ##
__license__ = 'GPL v3' __license__ = 'GPL v3'
@ -18,6 +18,7 @@ __author__ = '2016, Carlos Alves <carlos@carlosalves.info>'
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
class General(BasicNewsRecipe): class General(BasicNewsRecipe):
title = '10minutos' title = '10minutos'
__author__ = 'Carlos Alves' __author__ = 'Carlos Alves'

View File

@ -1,6 +1,6 @@
#!/usr/bin/env python2 #!/usr/bin/env python2
## ##
## Last Edited: 2016-01-11 Carlos Alves <carlos@carlosalves.info> # Last Edited: 2016-01-11 Carlos Alves <carlos@carlosalves.info>
## ##
__license__ = 'GPL v3' __license__ = 'GPL v3'
@ -11,6 +11,7 @@ __author__ = '2010, Gustavo Azambuja <hola at gazambuja.com>'
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
class Noticias(BasicNewsRecipe): class Noticias(BasicNewsRecipe):
title = '180.com.uy' title = '180.com.uy'
__author__ = 'Gustavo Azambuja' __author__ = 'Gustavo Azambuja'
@ -50,9 +51,7 @@ class Noticias(BasicNewsRecipe):
def get_cover_url(self): def get_cover_url(self):
pass pass
def preprocess_html(self, soup): def preprocess_html(self, soup):
for item in soup.findAll(style=True): for item in soup.findAll(style=True):
del item['style'] del item['style']
return soup return soup

View File

@ -54,7 +54,8 @@ class E1843(BasicNewsRecipe):
r = div.find(**classes('article-rubric')) r = div.find(**classes('article-rubric'))
if r is not None: if r is not None:
desc = self.tag_to_string(r) desc = self.tag_to_string(r)
articles.append({'title':title, 'url':url, 'description':desc}) articles.append(
{'title': title, 'url': url, 'description': desc})
if current_section and articles: if current_section and articles:
ans.append((current_section, articles)) ans.append((current_section, articles))

View File

@ -10,6 +10,7 @@ www.20minutos.es
import re import re
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1294946868(BasicNewsRecipe): class AdvancedUserRecipe1294946868(BasicNewsRecipe):
title = u'20 Minutos new' title = u'20 Minutos new'
@ -32,23 +33,15 @@ class AdvancedUserRecipe1294946868(BasicNewsRecipe):
remove_empty_feeds = True remove_empty_feeds = True
keep_only_tags = [ keep_only_tags = [
dict(name='div', attrs={'id':['content','vinetas',]}) dict(name='div', attrs={'id': ['content', 'vinetas', ]}), dict(name='div', attrs={'class': ['boxed', 'description', 'lead', 'article-content', 'cuerpo estirar']}), dict(name='span', attrs={'class': ['photo-bar']}), dict(name='ul', attrs={'class': ['article-author']}) # noqa
,dict(name='div', attrs={'class':['boxed','description','lead','article-content','cuerpo estirar']})
,dict(name='span', attrs={'class':['photo-bar']})
,dict(name='ul', attrs={'class':['article-author']})
] ]
remove_tags_before = dict(name='ul', attrs={'class': ['servicios-sub']}) remove_tags_before = dict(name='ul', attrs={'class': ['servicios-sub']})
remove_tags_after = dict(name='div' , attrs={'class':['related-news','col']}) remove_tags_after = dict(
name='div', attrs={'class': ['related-news', 'col']})
remove_tags = [ remove_tags = [
dict(name='ol', attrs={'class':['navigation',]}) dict(name='ol', attrs={'class': ['navigation', ]}), dict(name='span', attrs={'class': ['action']}), dict(name='div', attrs={'class': ['twitter comments-list hidden', 'related-news', 'col', 'photo-gallery', 'photo-gallery side-art-block', 'calendario', 'article-comment', 'postto estirar', 'otras_vinetas estirar', 'kment', 'user-actions']}), dict( name='div', attrs={'id': ['twitter-destacados', 'eco-tabs', 'inner', 'vineta_calendario', 'vinetistas clearfix', 'otras_vinetas estirar', 'MIN1', 'main', 'SUP1', 'INT']}), dict(name='ul', attrs={'class': ['article-user-actions', 'stripped-list']}), dict(name='ul', attrs={'id': ['site-links']}), dict(name='li', attrs={'class': ['puntuacion', 'enviar', 'compartir']}) # noqa
,dict(name='span', attrs={'class':['action']})
,dict(name='div', attrs={'class':['twitter comments-list hidden','related-news','col','photo-gallery','photo-gallery side-art-block','calendario','article-comment','postto estirar','otras_vinetas estirar','kment','user-actions']})
,dict(name='div', attrs={'id':['twitter-destacados','eco-tabs','inner','vineta_calendario','vinetistas clearfix','otras_vinetas estirar','MIN1','main','SUP1','INT']})
,dict(name='ul', attrs={'class':['article-user-actions','stripped-list']})
,dict(name='ul', attrs={'id':['site-links']})
,dict(name='li', attrs={'class':['puntuacion','enviar','compartir']})
] ]
extra_css = """ extra_css = """
@ -57,23 +50,25 @@ class AdvancedUserRecipe1294946868(BasicNewsRecipe):
h3{font-family: sans-serif; font-size:150%; font-weight:bold; text-align: justify; } h3{font-family: sans-serif; font-size:150%; font-weight:bold; text-align: justify; }
""" """
preprocess_regexps = [(re.compile(r'<a href="http://estaticos.*?[0-999]px;" target="_blank">', re.DOTALL), lambda m: '')] preprocess_regexps = [(re.compile(
r'<a href="http://estaticos.*?[0-999]px;" target="_blank">', re.DOTALL), lambda m: '')]
feeds = [ feeds = [
(u'Portada' , u'http://www.20minutos.es/rss/')
,(u'Nacional' , u'http://www.20minutos.es/rss/nacional/') (u'Portada', u'http://www.20minutos.es/rss/'),
,(u'Internacional' , u'http://www.20minutos.es/rss/internacional/') (u'Nacional', u'http://www.20minutos.es/rss/nacional/'),
,(u'Economia' , u'http://www.20minutos.es/rss/economia/') (u'Internacional', u'http://www.20minutos.es/rss/internacional/'),
,(u'Deportes' , u'http://www.20minutos.es/rss/deportes/') (u'Economia', u'http://www.20minutos.es/rss/economia/'),
,(u'Tecnologia' , u'http://www.20minutos.es/rss/tecnologia/') (u'Deportes', u'http://www.20minutos.es/rss/deportes/'),
,(u'Gente - TV' , u'http://www.20minutos.es/rss/gente-television/') (u'Tecnologia', u'http://www.20minutos.es/rss/tecnologia/'),
,(u'Motor' , u'http://www.20minutos.es/rss/motor/') (u'Gente - TV', u'http://www.20minutos.es/rss/gente-television/'),
,(u'Salud' , u'http://www.20minutos.es/rss/belleza-y-salud/') (u'Motor', u'http://www.20minutos.es/rss/motor/'),
,(u'Viajes' , u'http://www.20minutos.es/rss/viajes/') (u'Salud', u'http://www.20minutos.es/rss/belleza-y-salud/'),
,(u'Vivienda' , u'http://www.20minutos.es/rss/vivienda/') (u'Viajes', u'http://www.20minutos.es/rss/viajes/'),
,(u'Empleo' , u'http://www.20minutos.es/rss/empleo/') (u'Vivienda', u'http://www.20minutos.es/rss/vivienda/'),
,(u'Cine' , u'http://www.20minutos.es/rss/cine/') (u'Empleo', u'http://www.20minutos.es/rss/empleo/'),
,(u'Musica' , u'http://www.20minutos.es/rss/musica/') (u'Cine', u'http://www.20minutos.es/rss/cine/'),
,(u'Vinetas' , u'http://www.20minutos.es/rss/vinetas/') (u'Musica', u'http://www.20minutos.es/rss/musica/'),
,(u'Comunidad20' , u'http://www.20minutos.es/rss/zona20/') (u'Vinetas', u'http://www.20minutos.es/rss/vinetas/'),
(u'Comunidad20', u'http://www.20minutos.es/rss/zona20/')
] ]

View File

@ -6,6 +6,7 @@ __copyright__ = '2011 Aurélien Chabot <contact@aurelienchabot.fr>'
''' '''
from calibre.web.feeds.recipes import BasicNewsRecipe from calibre.web.feeds.recipes import BasicNewsRecipe
class Minutes(BasicNewsRecipe): class Minutes(BasicNewsRecipe):
title = '20 minutes' title = '20 minutes'

View File

@ -6,10 +6,11 @@ www.20minutos.es
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
class t20Minutos(BasicNewsRecipe): class t20Minutos(BasicNewsRecipe):
title = '20 Minutos' title = '20 Minutos'
__author__ = 'Darko Miletic' __author__ = 'Darko Miletic'
description = 'Diario de informacion general y local mas leido de Espania, noticias de ultima hora de Espania, el mundo, local, deportes, noticias curiosas y mas' description = 'Diario de informacion general y local mas leido de Espania, noticias de ultima hora de Espania, el mundo, local, deportes, noticias curiosas y mas' # noqa
publisher = '20 Minutos Online SL' publisher = '20 Minutos Online SL'
category = 'news, politics, Spain' category = 'news, politics, Spain'
oldest_article = 2 oldest_article = 2
@ -27,23 +28,21 @@ class t20Minutos(BasicNewsRecipe):
""" """
conversion_options = { conversion_options = {
'comment' : description 'comment': description, 'tags': category, 'publisher': publisher, 'language': language
, 'tags' : category
, 'publisher' : publisher
, 'language' : language
} }
remove_tags = [dict(attrs={'class': 'mf-viral'})] remove_tags = [dict(attrs={'class': 'mf-viral'})]
remove_attributes = ['border'] remove_attributes = ['border']
feeds = [ feeds = [
(u'Principal' , u'http://20minutos.feedsportal.com/c/32489/f/478284/index.rss')
,(u'Cine' , u'http://20minutos.feedsportal.com/c/32489/f/478285/index.rss') (u'Principal', u'http://20minutos.feedsportal.com/c/32489/f/478284/index.rss'),
,(u'Internacional' , u'http://20minutos.feedsportal.com/c/32489/f/492689/index.rss') (u'Cine', u'http://20minutos.feedsportal.com/c/32489/f/478285/index.rss'),
,(u'Deportes' , u'http://20minutos.feedsportal.com/c/32489/f/478286/index.rss') (u'Internacional', u'http://20minutos.feedsportal.com/c/32489/f/492689/index.rss'),
,(u'Nacional' , u'http://20minutos.feedsportal.com/c/32489/f/492688/index.rss') (u'Deportes', u'http://20minutos.feedsportal.com/c/32489/f/478286/index.rss'),
,(u'Economia' , u'http://20minutos.feedsportal.com/c/32489/f/492690/index.rss') (u'Nacional', u'http://20minutos.feedsportal.com/c/32489/f/492688/index.rss'),
,(u'Tecnologia' , u'http://20minutos.feedsportal.com/c/32489/f/478292/index.rss') (u'Economia', u'http://20minutos.feedsportal.com/c/32489/f/492690/index.rss'),
(u'Tecnologia', u'http://20minutos.feedsportal.com/c/32489/f/478292/index.rss')
] ]
def preprocess_html(self, soup): def preprocess_html(self, soup):
@ -62,7 +61,6 @@ class t20Minutos(BasicNewsRecipe):
str = self.tag_to_string(item) str = self.tag_to_string(item)
item.replaceWith(str) item.replaceWith(str)
for item in soup.findAll('img'): for item in soup.findAll('img'):
if not item.has_key('alt'): if not item.has_key('alt'): # noqa
item['alt'] = 'image' item['alt'] = 'image'
return soup return soup

View File

@ -11,6 +11,7 @@ import re
from calibre.web.feeds.recipes import BasicNewsRecipe from calibre.web.feeds.recipes import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import Tag from calibre.ebooks.BeautifulSoup import Tag
class Cro24Sata(BasicNewsRecipe): class Cro24Sata(BasicNewsRecipe):
title = '24 Sata - Hr' title = '24 Sata - Hr'
__author__ = 'Darko Miletic' __author__ = 'Darko Miletic'
@ -27,29 +28,28 @@ class Cro24Sata(BasicNewsRecipe):
lang = 'hr-HR' lang = 'hr-HR'
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: serif1, serif}' extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: serif1, serif}' # noqa
conversion_options = { conversion_options = {
'comment' : description 'comment': description, 'tags': category, 'publisher': publisher, 'language': lang, 'pretty_print': True
, 'tags' : category
, 'publisher' : publisher
, 'language' : lang
, 'pretty_print' : True
} }
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')] preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
remove_tags = [ remove_tags = [
dict(name=['object','link','embed']) dict(name=['object', 'link', 'embed']), dict(
,dict(name='table', attrs={'class':'enumbox'}) name='table', attrs={'class': 'enumbox'})
] ]
feeds = [(u'Najnovije Vijesti', u'http://www.24sata.hr/index.php?cmd=show_rss&action=novo')] feeds = [(u'Najnovije Vijesti',
u'http://www.24sata.hr/index.php?cmd=show_rss&action=novo')]
def preprocess_html(self, soup): def preprocess_html(self, soup):
soup.html['lang'] = self.lang soup.html['lang'] = self.lang
mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)]) mlang = Tag(soup, 'meta', [
mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=UTF-8")]) ("http-equiv", "Content-Language"), ("content", self.lang)])
mcharset = Tag(soup, 'meta', [
("http-equiv", "Content-Type"), ("content", "text/html; charset=UTF-8")])
soup.head.insert(0, mlang) soup.head.insert(0, mlang)
soup.head.insert(1, mcharset) soup.head.insert(1, mcharset)
for item in soup.findAll(style=True): for item in soup.findAll(style=True):
@ -58,4 +58,3 @@ class Cro24Sata(BasicNewsRecipe):
def print_version(self, url): def print_version(self, url):
return url + '&action=ispis' return url + '&action=ispis'

View File

@ -10,6 +10,7 @@ __copyright__ = '2009-2012, Darko Miletic <darko.miletic at gmail.com>'
import re import re
from calibre.web.feeds.recipes import BasicNewsRecipe from calibre.web.feeds.recipes import BasicNewsRecipe
class Ser24Sata(BasicNewsRecipe): class Ser24Sata(BasicNewsRecipe):
title = '24 Sata - Sr' title = '24 Sata - Sr'
__author__ = 'Darko Miletic' __author__ = 'Darko Miletic'
@ -29,10 +30,7 @@ class Ser24Sata(BasicNewsRecipe):
""" """
conversion_options = { conversion_options = {
'comment' : description 'comment': description, 'tags': category, 'publisher': publisher, 'language': language
, 'tags' : category
, 'publisher': publisher
, 'language' : language
} }
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')] preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]

View File

@ -3,6 +3,7 @@
from __future__ import unicode_literals, division, absolute_import, print_function from __future__ import unicode_literals, division, absolute_import, print_function
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1438446837(BasicNewsRecipe): class AdvancedUserRecipe1438446837(BasicNewsRecipe):
title = '3DNews: Daily Digital Digest' title = '3DNews: Daily Digital Digest'
__author__ = 'bugmen00t' __author__ = 'bugmen00t'
@ -17,30 +18,48 @@ class AdvancedUserRecipe1438446837(BasicNewsRecipe):
max_articles_per_feed = 60 max_articles_per_feed = 60
feeds = [ feeds = [
('\u041d\u043e\u0432\u043e\u0441\u0442\u0438 Hardware', 'http://www.3dnews.ru/news/rss/'), ('\u041d\u043e\u0432\u043e\u0441\u0442\u0438 Hardware',
('\u041d\u043e\u0432\u043e\u0441\u0442\u0438 Software', 'http://www.3dnews.ru/software-news/rss/'), 'http://www.3dnews.ru/news/rss/'),
('\u0423\u043c\u043d\u044b\u0435 \u0432\u0435\u0449\u0438', 'http://www.3dnews.ru/smart-things/rss/'), ('\u041d\u043e\u0432\u043e\u0441\u0442\u0438 Software',
('\u0410\u043d\u0430\u043b\u0438\u0442\u0438\u043a\u0430', 'http://www.3dnews.ru/editorial/rss/'), 'http://www.3dnews.ru/software-news/rss/'),
('\u041f\u0440\u043e\u0446\u0435\u0441\u0441\u043e\u0440\u044b \u0438 \u043f\u0430\u043c\u044f\u0442\u044c', 'http://www.3dnews.ru/cpu/rss/'), ('\u0423\u043c\u043d\u044b\u0435 \u0432\u0435\u0449\u0438',
('\u041c\u0430\u0442\u0435\u0440\u0438\u043d\u0441\u043a\u0438\u0435 \u043f\u043b\u0430\u0442\u044b', 'http://www.3dnews.ru/motherboard/rss/'), 'http://www.3dnews.ru/smart-things/rss/'),
('\u0410\u043d\u0430\u043b\u0438\u0442\u0438\u043a\u0430',
'http://www.3dnews.ru/editorial/rss/'),
('\u041f\u0440\u043e\u0446\u0435\u0441\u0441\u043e\u0440\u044b \u0438 \u043f\u0430\u043c\u044f\u0442\u044c',
'http://www.3dnews.ru/cpu/rss/'),
('\u041c\u0430\u0442\u0435\u0440\u0438\u043d\u0441\u043a\u0438\u0435 \u043f\u043b\u0430\u0442\u044b',
'http://www.3dnews.ru/motherboard/rss/'),
('\u041a\u043e\u0440\u043f\u0443\u0441\u0430, \u0411\u041f \u0438 \u043e\u0445\u043b\u0430\u0436\u0434\u0435\u043d\u0438\u0435', ('\u041a\u043e\u0440\u043f\u0443\u0441\u0430, \u0411\u041f \u0438 \u043e\u0445\u043b\u0430\u0436\u0434\u0435\u043d\u0438\u0435',
'http://www.3dnews.ru/cooling/rss/'), 'http://www.3dnews.ru/cooling/rss/'),
('\u0412\u0438\u0434\u0435\u043e\u043a\u0430\u0440\u0442\u044b', 'http://www.3dnews.ru/video/rss/'), ('\u0412\u0438\u0434\u0435\u043e\u043a\u0430\u0440\u0442\u044b',
('\u041c\u043e\u043d\u0438\u0442\u043e\u0440\u044b \u0438 \u043f\u0440\u043e\u0435\u043a\u0442\u043e\u0440\u044b', 'http://www.3dnews.ru/display/rss/'), 'http://www.3dnews.ru/video/rss/'),
('\u041d\u0430\u043a\u043e\u043f\u0438\u0442\u0435\u043b\u0438', 'http://www.3dnews.ru/storage/rss/'), ('\u041c\u043e\u043d\u0438\u0442\u043e\u0440\u044b \u0438 \u043f\u0440\u043e\u0435\u043a\u0442\u043e\u0440\u044b',
('\u0426\u0438\u0444\u0440\u043e\u0432\u043e\u0439 \u0430\u0432\u0442\u043e\u043c\u043e\u0431\u0438\u043b\u044c', 'http://www.3dnews.ru/auto/rss/'), 'http://www.3dnews.ru/display/rss/'),
('\u0421\u043e\u0442\u043e\u0432\u0430\u044f \u0441\u0432\u044f\u0437\u044c', 'http://www.3dnews.ru/phone/rss/'), ('\u041d\u0430\u043a\u043e\u043f\u0438\u0442\u0435\u043b\u0438',
('\u041f\u0435\u0440\u0438\u0444\u0435\u0440\u0438\u044f', 'http://www.3dnews.ru/peripheral/rss/'), 'http://www.3dnews.ru/storage/rss/'),
('\u041d\u043e\u0443\u0442\u0431\u0443\u043a\u0438 \u0438 \u041f\u041a', 'http://www.3dnews.ru/mobile/rss/'), ('\u0426\u0438\u0444\u0440\u043e\u0432\u043e\u0439 \u0430\u0432\u0442\u043e\u043c\u043e\u0431\u0438\u043b\u044c',
('\u041f\u043b\u0430\u043d\u0448\u0435\u0442\u044b', 'http://www.3dnews.ru/tablets/rss/'), 'http://www.3dnews.ru/auto/rss/'),
('\u0417\u0432\u0443\u043a \u0438 \u0430\u043a\u0443\u0441\u0442\u0438\u043a\u0430', 'http://www.3dnews.ru/multimedia/rss/'), ('\u0421\u043e\u0442\u043e\u0432\u0430\u044f \u0441\u0432\u044f\u0437\u044c',
'http://www.3dnews.ru/phone/rss/'),
('\u041f\u0435\u0440\u0438\u0444\u0435\u0440\u0438\u044f',
'http://www.3dnews.ru/peripheral/rss/'),
('\u041d\u043e\u0443\u0442\u0431\u0443\u043a\u0438 \u0438 \u041f\u041a',
'http://www.3dnews.ru/mobile/rss/'),
('\u041f\u043b\u0430\u043d\u0448\u0435\u0442\u044b',
'http://www.3dnews.ru/tablets/rss/'),
('\u0417\u0432\u0443\u043a \u0438 \u0430\u043a\u0443\u0441\u0442\u0438\u043a\u0430',
'http://www.3dnews.ru/multimedia/rss/'),
('\u0426\u0438\u0444\u0440\u043e\u0432\u043e\u0435 \u0444\u043e\u0442\u043e \u0438 \u0432\u0438\u0434\u0435\u043e', ('\u0426\u0438\u0444\u0440\u043e\u0432\u043e\u0435 \u0444\u043e\u0442\u043e \u0438 \u0432\u0438\u0434\u0435\u043e',
'http://www.3dnews.ru/digital/rss/'), 'http://www.3dnews.ru/digital/rss/'),
('\u0421\u0435\u0442\u0438 \u0438 \u043a\u043e\u043c\u043c\u0443\u043d\u0438\u043a\u0430\u0446\u0438\u0438', 'http://www.3dnews.ru/communication/rss/'), ('\u0421\u0435\u0442\u0438 \u0438 \u043a\u043e\u043c\u043c\u0443\u043d\u0438\u043a\u0430\u0446\u0438\u0438',
'http://www.3dnews.ru/communication/rss/'),
('\u0418\u0433\u0440\u044b', 'http://www.3dnews.ru/games/rss/'), ('\u0418\u0433\u0440\u044b', 'http://www.3dnews.ru/games/rss/'),
('\u041f\u0440\u043e\u0433\u0440\u0430\u043c\u043c\u043d\u043e\u0435 \u043e\u0431\u0435\u0441\u043f\u0435\u0447\u0435\u043d\u0438\u0435', ('\u041f\u0440\u043e\u0433\u0440\u0430\u043c\u043c\u043d\u043e\u0435 \u043e\u0431\u0435\u0441\u043f\u0435\u0447\u0435\u043d\u0438\u0435',
'http://www.3dnews.ru/software/rss/'), 'http://www.3dnews.ru/software/rss/'),
('Off-\u0441\u044f\u043d\u043a\u0430', 'http://www.3dnews.ru/offsyanka/rss/'), ('Off-\u0441\u044f\u043d\u043a\u0430',
('\u041c\u0430\u0441\u0442\u0435\u0440\u0441\u043a\u0430\u044f', 'http://www.3dnews.ru/workshop/rss/'), 'http://www.3dnews.ru/offsyanka/rss/'),
('\u041c\u0430\u0441\u0442\u0435\u0440\u0441\u043a\u0430\u044f',
'http://www.3dnews.ru/workshop/rss/'),
('ServerNews', 'http://servernews.ru/rss'), ('ServerNews', 'http://servernews.ru/rss'),
] ]

View File

@ -9,6 +9,7 @@ elargentino.com
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import Tag from calibre.ebooks.BeautifulSoup import Tag
class SieteDias(BasicNewsRecipe): class SieteDias(BasicNewsRecipe):
title = '7 dias' title = '7 dias'
__author__ = 'Darko Miletic' __author__ = 'Darko Miletic'
@ -27,14 +28,6 @@ class SieteDias(BasicNewsRecipe):
INDEX = 'http://www.elargentino.com/medios/125/7-Dias.html' INDEX = 'http://www.elargentino.com/medios/125/7-Dias.html'
extra_css = ' .titulo{font-size: x-large; font-weight: bold} .volantaImp{font-size: small; font-weight: bold} ' extra_css = ' .titulo{font-size: x-large; font-weight: bold} .volantaImp{font-size: small; font-weight: bold} '
html2lrf_options = [
'--comment' , description
, '--category' , category
, '--publisher', publisher
]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\noverride_css=" p {text-indent: 0cm; margin-top: 0em; margin-bottom: 0.5em} "'
keep_only_tags = [dict(name='div', attrs={'class': 'ContainerPop'})] keep_only_tags = [dict(name='div', attrs={'class': 'ContainerPop'})]
remove_tags = [dict(name='link')] remove_tags = [dict(name='link')]
@ -51,8 +44,10 @@ class SieteDias(BasicNewsRecipe):
del item['style'] del item['style']
soup.html['lang'] = self.lang soup.html['lang'] = self.lang
soup.html['dir'] = self.direction soup.html['dir'] = self.direction
mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)]) mlang = Tag(soup, 'meta', [
mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=utf-8")]) ("http-equiv", "Content-Language"), ("content", self.lang)])
mcharset = Tag(soup, 'meta', [
("http-equiv", "Content-Type"), ("content", "text/html; charset=utf-8")])
soup.head.insert(0, mlang) soup.head.insert(0, mlang)
soup.head.insert(1, mcharset) soup.head.insert(1, mcharset)
return soup return soup
@ -62,7 +57,8 @@ class SieteDias(BasicNewsRecipe):
soup = self.index_to_soup(self.INDEX) soup = self.index_to_soup(self.INDEX)
cover_item = soup.find('div', attrs={'class': 'colder'}) cover_item = soup.find('div', attrs={'class': 'colder'})
if cover_item: if cover_item:
clean_url = self.image_url_processor(None,cover_item.div.img['src']) clean_url = self.image_url_processor(
None, cover_item.div.img['src'])
cover_url = 'http://www.elargentino.com' + clean_url + '&height=600' cover_url = 'http://www.elargentino.com' + clean_url + '&height=600'
return cover_url return cover_url

View File

@ -9,6 +9,7 @@ sapteseri.ro
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
class SapteSeri(BasicNewsRecipe): class SapteSeri(BasicNewsRecipe):
title = u'Sapte Seri' title = u'Sapte Seri'
__author__ = u'Silviu Cotoar\u0103' __author__ = u'Silviu Cotoar\u0103'
@ -26,17 +27,12 @@ class SapteSeri(BasicNewsRecipe):
cover_url = 'http://www.sapteseri.ro/Images/logo.jpg' cover_url = 'http://www.sapteseri.ro/Images/logo.jpg'
conversion_options = { conversion_options = {
'comments' : description 'comments': description, 'tags': category, 'language': language, 'publisher': publisher
,'tags' : category
,'language' : language
,'publisher' : publisher
} }
keep_only_tags = [ keep_only_tags = [
dict(name='h1', attrs={'id':'title'}) dict(name='h1', attrs={'id': 'title'}), dict(name='div', attrs={'class': 'mt10 mb10'}), dict(
, dict(name='div', attrs={'class':'mt10 mb10'}) name='div', attrs={'class': 'mb20 mt10'}), dict(name='div', attrs={'class': 'mt5 mb20'})
, dict(name='div', attrs={'class':'mb20 mt10'})
, dict(name='div', attrs={'class':'mt5 mb20'})
] ]
remove_tags = [ remove_tags = [
@ -44,7 +40,8 @@ class SapteSeri(BasicNewsRecipe):
] ]
feeds = [ feeds = [
(u'Ce se intampla azi in Bucuresti', u'http://www.sapteseri.ro/ro/feed/ce-se-intampla-azi/bucuresti/') (u'Ce se intampla azi in Bucuresti',
u'http://www.sapteseri.ro/ro/feed/ce-se-intampla-azi/bucuresti/')
] ]
def preprocess_html(self, soup): def preprocess_html(self, soup):

View File

@ -9,6 +9,7 @@ http://www.ansa.it/
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
class Ansa(BasicNewsRecipe): class Ansa(BasicNewsRecipe):
__author__ = 'Gabriele Marini' __author__ = 'Gabriele Marini'
description = 'Italian News Agency' description = 'Italian News Agency'
@ -34,13 +35,11 @@ class Ansa(BasicNewsRecipe):
keep_only_tags = [dict(name='div', attrs={'class': ['path', 'header-content', 'corpo']}), keep_only_tags = [dict(name='div', attrs={'class': ['path', 'header-content', 'corpo']}),
] ]
remove_tags = [ remove_tags = [
dict(name='div', attrs={'class': 'tools-bar'}), dict(name='div', attrs={'class': 'tools-bar'}),
dict(name='div', attrs={'id': ['rssdiv', 'blocco']}) dict(name='div', attrs={'id': ['rssdiv', 'blocco']})
] ]
feeds = [ feeds = [
(u'HomePage', u'http://www.ansa.it/web/ansait_web_rss_homepage.xml'), (u'HomePage', u'http://www.ansa.it/web/ansait_web_rss_homepage.xml'),
(u'Top New', u'http://www.ansa.it/web/notizie/rubriche/topnews/topnews_rss.xml'), (u'Top New', u'http://www.ansa.it/web/notizie/rubriche/topnews/topnews_rss.xml'),
@ -50,9 +49,11 @@ class Ansa(BasicNewsRecipe):
(u'Politica', u'http://www.ansa.it/web/notizie/rubriche/politica/politica_rss.xml'), (u'Politica', u'http://www.ansa.it/web/notizie/rubriche/politica/politica_rss.xml'),
(u'Scienze', u'http://www.ansa.it/web/notizie/rubriche/scienza/scienza_rss.xml'), (u'Scienze', u'http://www.ansa.it/web/notizie/rubriche/scienza/scienza_rss.xml'),
(u'Cinema', u'http://www.ansa.it/web/notizie/rubriche/cinema/cinema_rss.xml'), (u'Cinema', u'http://www.ansa.it/web/notizie/rubriche/cinema/cinema_rss.xml'),
(u'Tecnologia e Internet', u'http://www.ansa.it/web/notizie/rubriche/tecnologia/tecnologia_rss.xml'), (u'Tecnologia e Internet',
u'http://www.ansa.it/web/notizie/rubriche/tecnologia/tecnologia_rss.xml'),
(u'Spettacolo', u'http://www.ansa.it/web/notizie/rubriche/spettacolo/spettacolo_rss.xml'), (u'Spettacolo', u'http://www.ansa.it/web/notizie/rubriche/spettacolo/spettacolo_rss.xml'),
(u'Cultura e Tendenze', u'http://www.ansa.it/web/notizie/rubriche/cultura/cultura_rss.xml'), (u'Cultura e Tendenze',
u'http://www.ansa.it/web/notizie/rubriche/cultura/cultura_rss.xml'),
(u'Sport', u'http://www.ansa.it/web/notizie/rubriche/altrisport/altrisport_rss.xml'), (u'Sport', u'http://www.ansa.it/web/notizie/rubriche/altrisport/altrisport_rss.xml'),
(u'Calcio', u'http://www.ansa.it/web/notizie/rubriche/calcio/calcio_rss.xml'), (u'Calcio', u'http://www.ansa.it/web/notizie/rubriche/calcio/calcio_rss.xml'),
(u'Lazio', u'http://www.ansa.it/web/notizie/regioni/lazio/lazio_rss.xml'), (u'Lazio', u'http://www.ansa.it/web/notizie/regioni/lazio/lazio_rss.xml'),

View File

@ -1,6 +1,7 @@
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
import re import re
class DrawAndCook(BasicNewsRecipe): class DrawAndCook(BasicNewsRecipe):
title = 'DrawAndCook' title = 'DrawAndCook'
__author__ = 'Starson17' __author__ = 'Starson17'
@ -38,18 +39,20 @@ class DrawAndCook(BasicNewsRecipe):
date = '' date = ''
current_articles = [] current_articles = []
soup = self.index_to_soup(url) soup = self.index_to_soup(url)
featured_major_slider = soup.find(name='div', attrs={'id':'featured_major_slider'}) featured_major_slider = soup.find(
recipes = featured_major_slider.findAll('li', attrs={'data-id': re.compile(r'artwork_entry_\d+', re.DOTALL)}) name='div', attrs={'id': 'featured_major_slider'})
recipes = featured_major_slider.findAll(
'li', attrs={'data-id': re.compile(r'artwork_entry_\d+', re.DOTALL)})
for recipe in recipes: for recipe in recipes:
page_url = self.INDEX + recipe.a['href'] page_url = self.INDEX + recipe.a['href']
print 'page_url is: ', page_url print 'page_url is: ', page_url
title = recipe.find('strong').string title = recipe.find('strong').string
print 'title is: ', title print 'title is: ', title
current_articles.append({'title': title, 'url': page_url, 'description':'', 'date':date}) current_articles.append(
{'title': title, 'url': page_url, 'description': '', 'date': date})
return current_articles return current_articles
keep_only_tags = [dict(name='h1', attrs={'id':'page_title'}) keep_only_tags = [dict(name='h1', attrs={'id': 'page_title'}), dict(name='section', attrs={'id': 'artwork'})
,dict(name='section', attrs={'id':'artwork'})
] ]
remove_tags = [dict(name='article', attrs={'id': ['recipe_actions', 'metadata']}) remove_tags = [dict(name='article', attrs={'id': ['recipe_actions', 'metadata']})
@ -62,4 +65,3 @@ class DrawAndCook(BasicNewsRecipe):
p{font-family:Arial,Helvetica,sans-serif;font-size:small;} p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
body{font-family:Helvetica,Arial,sans-serif;font-size:small;} body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
''' '''

View File

@ -2,7 +2,6 @@ from calibre.web.feeds.news import BasicNewsRecipe
import re import re
class ZiveRecipe(BasicNewsRecipe): class ZiveRecipe(BasicNewsRecipe):
__license__ = 'GPL v3' __license__ = 'GPL v3'
__author__ = 'Abelturd' __author__ = 'Abelturd'
@ -25,7 +24,8 @@ class ZiveRecipe(BasicNewsRecipe):
cover_url = 'http://www.zive.sk/Client.Images/Logos/logo-zive-sk.gif' cover_url = 'http://www.zive.sk/Client.Images/Logos/logo-zive-sk.gif'
feeds = [] feeds = []
feeds.append((u'V\u0161etky \u010dl\xe1nky', u'http://www.zive.sk/rss/sc-47/default.aspx')) feeds.append((u'V\u0161etky \u010dl\xe1nky',
u'http://www.zive.sk/rss/sc-47/default.aspx'))
preprocess_regexps = [ preprocess_regexps = [
(re.compile(r'<p><p><strong>Pokra.*ie</strong></p>', re.DOTALL | re.IGNORECASE), (re.compile(r'<p><p><strong>Pokra.*ie</strong></p>', re.DOTALL | re.IGNORECASE),
@ -33,13 +33,11 @@ class ZiveRecipe(BasicNewsRecipe):
] ]
remove_tags = [] remove_tags = []
keep_only_tags = [dict(name='h1'), dict(name='span', attrs={'class':'arlist-data-info-author'}), dict(name='div', attrs={'class':'bbtext font-resizer-area'}),] keep_only_tags = [dict(name='h1'), dict(name='span', attrs={
'class': 'arlist-data-info-author'}), dict(name='div', attrs={'class': 'bbtext font-resizer-area'}), ]
extra_css = ''' extra_css = '''
h1 {font-size:140%;font-family:georgia,serif; font-weight:bold} h1 {font-size:140%;font-family:georgia,serif; font-weight:bold}
h3 {font-size:115%;font-family:georgia,serif; font-weight:bold} h3 {font-size:115%;font-family:georgia,serif; font-weight:bold}
''' '''

View File

@ -1,4 +1,6 @@
from calibre.web.feeds.recipes import BasicNewsRecipe from calibre.web.feeds.recipes import BasicNewsRecipe
class AdvancedUserRecipe(BasicNewsRecipe): class AdvancedUserRecipe(BasicNewsRecipe):
title = u'Aachener Nachrichten' title = u'Aachener Nachrichten'
@ -26,46 +28,86 @@ class AdvancedUserRecipe(BasicNewsRecipe):
] ]
feeds = [ feeds = [
(u'Lokales - Euregio', u'http://www.aachener-nachrichten.de/cmlink/euregio-rss-1.357285'), (u'Lokales - Euregio',
(u'Lokales - Aachen', u'http://www.aachener-nachrichten.de/cmlink/aachen-rss-1.357286'), u'http://www.aachener-nachrichten.de/cmlink/euregio-rss-1.357285'),
(u'Lokales - Nordkreis', u'http://www.aachener-nachrichten.de/cmlink/nordkreis-rss-1.358150'), (u'Lokales - Aachen',
(u'Lokales - Düren', u'http://www.aachener-nachrichten.de/cmlink/dueren-rss-1.358626'), u'http://www.aachener-nachrichten.de/cmlink/aachen-rss-1.357286'),
(u'Lokales - Eiffel', u'http://www.aachener-nachrichten.de/cmlink/eifel-rss-1.358978'), (u'Lokales - Nordkreis',
(u'Lokales - Eschweiler', u'http://www.aachener-nachrichten.de/cmlink/eschweiler-rss-1.359332'), u'http://www.aachener-nachrichten.de/cmlink/nordkreis-rss-1.358150'),
(u'Lokales - Geilenkirchen', u'http://www.aachener-nachrichten.de/cmlink/geilenkirchen-rss-1.359643'), (u'Lokales - Düren',
(u'Lokales - Heinsberg', u'http://www.aachener-nachrichten.de/cmlink/heinsberg-rss-1.359724'), u'http://www.aachener-nachrichten.de/cmlink/dueren-rss-1.358626'),
(u'Lokales - Jülich', u'http://www.aachener-nachrichten.de/cmlink/juelich-rss-1.359725'), (u'Lokales - Eiffel',
(u'Lokales - Stolberg', u'http://www.aachener-nachrichten.de/cmlink/stolberg-rss-1.359726'), u'http://www.aachener-nachrichten.de/cmlink/eifel-rss-1.358978'),
(u'News - Politik', u'http://www.aachener-nachrichten.de/cmlink/politik-rss-1.359727'), (u'Lokales - Eschweiler',
(u'News - Aus aller Welt', u'http://www.aachener-nachrichten.de/cmlink/ausallerwelt-rss-1.453282'), u'http://www.aachener-nachrichten.de/cmlink/eschweiler-rss-1.359332'),
(u'News - Wirtschaft', u'http://www.aachener-nachrichten.de/cmlink/wirtschaft-rss-1.359872'), (u'Lokales - Geilenkirchen',
(u'News - Kultur', u'http://www.aachener-nachrichten.de/cmlink/kultur-rss-1.365018'), u'http://www.aachener-nachrichten.de/cmlink/geilenkirchen-rss-1.359643'),
(u'Lokales - Heinsberg',
u'http://www.aachener-nachrichten.de/cmlink/heinsberg-rss-1.359724'),
(u'Lokales - Jülich',
u'http://www.aachener-nachrichten.de/cmlink/juelich-rss-1.359725'),
(u'Lokales - Stolberg',
u'http://www.aachener-nachrichten.de/cmlink/stolberg-rss-1.359726'),
(u'News - Politik',
u'http://www.aachener-nachrichten.de/cmlink/politik-rss-1.359727'),
(u'News - Aus aller Welt',
u'http://www.aachener-nachrichten.de/cmlink/ausallerwelt-rss-1.453282'),
(u'News - Wirtschaft',
u'http://www.aachener-nachrichten.de/cmlink/wirtschaft-rss-1.359872'),
(u'News - Kultur',
u'http://www.aachener-nachrichten.de/cmlink/kultur-rss-1.365018'),
(u'News - Kino', u'http://www.aachener-nachrichten.de/cmlink/kino-rss-1.365019'), (u'News - Kino', u'http://www.aachener-nachrichten.de/cmlink/kino-rss-1.365019'),
(u'News - Digital', u'http://www.aachener-nachrichten.de/cmlink/digital-rss-1.365020'), (u'News - Digital',
(u'News - Wissenschaft', u'http://www.aachener-nachrichten.de/cmlink/wissenschaft-rss-1.365021'), u'http://www.aachener-nachrichten.de/cmlink/digital-rss-1.365020'),
(u'News - Hochschule', u'http://www.aachener-nachrichten.de/cmlink/hochschule-rss-1.365022'), (u'News - Wissenschaft',
u'http://www.aachener-nachrichten.de/cmlink/wissenschaft-rss-1.365021'),
(u'News - Hochschule',
u'http://www.aachener-nachrichten.de/cmlink/hochschule-rss-1.365022'),
(u'News - Auto', u'http://www.aachener-nachrichten.de/cmlink/auto-rss-1.365023'), (u'News - Auto', u'http://www.aachener-nachrichten.de/cmlink/auto-rss-1.365023'),
(u'News - Kurioses', u'http://www.aachener-nachrichten.de/cmlink/kurioses-rss-1.365067'), (u'News - Kurioses',
(u'News - Musik', u'http://www.aachener-nachrichten.de/cmlink/musik-rss-1.365305'), u'http://www.aachener-nachrichten.de/cmlink/kurioses-rss-1.365067'),
(u'News - Tagesthema', u'http://www.aachener-nachrichten.de/cmlink/tagesthema-rss-1.365519'), (u'News - Musik',
(u'News - Newsticker', u'http://www.aachener-nachrichten.de/cmlink/newsticker-rss-1.451948'), u'http://www.aachener-nachrichten.de/cmlink/musik-rss-1.365305'),
(u'Sport - Aktuell', u'http://www.aachener-nachrichten.de/cmlink/aktuell-rss-1.366716'), (u'News - Tagesthema',
(u'Sport - Fußball', u'http://www.aachener-nachrichten.de/cmlink/fussball-rss-1.367060'), u'http://www.aachener-nachrichten.de/cmlink/tagesthema-rss-1.365519'),
(u'Sport - Bundesliga', u'http://www.aachener-nachrichten.de/cmlink/bundesliga-rss-1.453367'), (u'News - Newsticker',
(u'Sport - Alemannia Aachen', u'http://www.aachener-nachrichten.de/cmlink/alemanniaaachen-rss-1.366057'), u'http://www.aachener-nachrichten.de/cmlink/newsticker-rss-1.451948'),
(u'Sport - Volleyball', u'http://www.aachener-nachrichten.de/cmlink/volleyball-rss-1.453370'), (u'Sport - Aktuell',
(u'Sport - Chio', u'http://www.aachener-nachrichten.de/cmlink/chio-rss-1.453371'), u'http://www.aachener-nachrichten.de/cmlink/aktuell-rss-1.366716'),
(u'Dossier - Kinderuni', u'http://www.aachener-nachrichten.de/cmlink/kinderuni-rss-1.453375'), (u'Sport - Fußball',
(u'Dossier - Karlspreis', u'http://www.aachener-nachrichten.de/cmlink/karlspreis-rss-1.453376'), u'http://www.aachener-nachrichten.de/cmlink/fussball-rss-1.367060'),
(u'Dossier - Ritterorden', u'http://www.aachener-nachrichten.de/cmlink/ritterorden-rss-1.453377'), (u'Sport - Bundesliga',
(u'Dossier - ZAB-Aachen', u'http://www.aachener-nachrichten.de/cmlink/zabaachen-rss-1.453380'), u'http://www.aachener-nachrichten.de/cmlink/bundesliga-rss-1.453367'),
(u'Dossier - Karneval', u'http://www.aachener-nachrichten.de/cmlink/karneval-rss-1.453384'), (u'Sport - Alemannia Aachen',
(u'Ratgeber - Geld', u'http://www.aachener-nachrichten.de/cmlink/geld-rss-1.453385'), u'http://www.aachener-nachrichten.de/cmlink/alemanniaaachen-rss-1.366057'),
(u'Ratgeber - Recht', u'http://www.aachener-nachrichten.de/cmlink/recht-rss-1.453386'), (u'Sport - Volleyball',
(u'Ratgeber - Gesundheit', u'http://www.aachener-nachrichten.de/cmlink/gesundheit-rss-1.453387'), u'http://www.aachener-nachrichten.de/cmlink/volleyball-rss-1.453370'),
(u'Ratgeber - Familie', u'http://www.aachener-nachrichten.de/cmlink/familie-rss-1.453388'), (u'Sport - Chio',
(u'Ratgeber - Livestyle', u'http://www.aachener-nachrichten.de/cmlink/lifestyle-rss-1.453389'), u'http://www.aachener-nachrichten.de/cmlink/chio-rss-1.453371'),
(u'Ratgeber - Reisen', u'http://www.aachener-nachrichten.de/cmlink/reisen-rss-1.453390'), (u'Dossier - Kinderuni',
(u'Ratgeber - Bauen und Wohnen', u'http://www.aachener-nachrichten.de/cmlink/bauen-rss-1.453398'), u'http://www.aachener-nachrichten.de/cmlink/kinderuni-rss-1.453375'),
(u'Ratgeber - Bildung und Beruf', u'http://www.aachener-nachrichten.de/cmlink/bildung-rss-1.453400'), (u'Dossier - Karlspreis',
u'http://www.aachener-nachrichten.de/cmlink/karlspreis-rss-1.453376'),
(u'Dossier - Ritterorden',
u'http://www.aachener-nachrichten.de/cmlink/ritterorden-rss-1.453377'),
(u'Dossier - ZAB-Aachen',
u'http://www.aachener-nachrichten.de/cmlink/zabaachen-rss-1.453380'),
(u'Dossier - Karneval',
u'http://www.aachener-nachrichten.de/cmlink/karneval-rss-1.453384'),
(u'Ratgeber - Geld',
u'http://www.aachener-nachrichten.de/cmlink/geld-rss-1.453385'),
(u'Ratgeber - Recht',
u'http://www.aachener-nachrichten.de/cmlink/recht-rss-1.453386'),
(u'Ratgeber - Gesundheit',
u'http://www.aachener-nachrichten.de/cmlink/gesundheit-rss-1.453387'),
(u'Ratgeber - Familie',
u'http://www.aachener-nachrichten.de/cmlink/familie-rss-1.453388'),
(u'Ratgeber - Livestyle',
u'http://www.aachener-nachrichten.de/cmlink/lifestyle-rss-1.453389'),
(u'Ratgeber - Reisen',
u'http://www.aachener-nachrichten.de/cmlink/reisen-rss-1.453390'),
(u'Ratgeber - Bauen und Wohnen',
u'http://www.aachener-nachrichten.de/cmlink/bauen-rss-1.453398'),
(u'Ratgeber - Bildung und Beruf',
u'http://www.aachener-nachrichten.de/cmlink/bildung-rss-1.453400'),
] ]

View File

@ -1,6 +1,7 @@
import re import re
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
class ABCRecipe(BasicNewsRecipe): class ABCRecipe(BasicNewsRecipe):
title = u'ABC Linuxu' title = u'ABC Linuxu'
oldest_article = 5 oldest_article = 5
@ -30,10 +31,11 @@ class ABCRecipe(BasicNewsRecipe):
dict(name='', attrs={'': ''}), dict(name='', attrs={'': ''}),
] ]
preprocess_regexps = [ preprocess_regexps = [
(re.compile(r'</div>.*<p class="perex">', re.DOTALL),lambda match: '</div><p class="perex">') (re.compile(r'</div>.*<p class="perex">', re.DOTALL),
lambda match: '</div><p class="perex">')
] ]
def print_version(self, url): def print_version(self, url):
return url + '?varianta=print&noDiz' return url + '?varianta=print&noDiz'

View File

@ -6,6 +6,7 @@ abc.net.au/news
import re import re
from calibre.web.feeds.recipes import BasicNewsRecipe from calibre.web.feeds.recipes import BasicNewsRecipe
class ABCNews(BasicNewsRecipe): class ABCNews(BasicNewsRecipe):
title = 'ABC News' title = 'ABC News'
__author__ = 'Pat Stapleton, Dean Cording' __author__ = 'Pat Stapleton, Dean Cording'
@ -16,7 +17,6 @@ class ABCNews(BasicNewsRecipe):
oldest_article = 2 oldest_article = 2
max_articles_per_feed = 100 max_articles_per_feed = 100
no_stylesheets = False no_stylesheets = False
#delay = 1
use_embedded_content = False use_embedded_content = False
encoding = 'utf8' encoding = 'utf8'
publisher = 'ABC News' publisher = 'ABC News'
@ -24,14 +24,12 @@ class ABCNews(BasicNewsRecipe):
language = 'en_AU' language = 'en_AU'
publication_type = 'newsportal' publication_type = 'newsportal'
# preprocess_regexps = [(re.compile(r'<!--.*?-->', re.DOTALL), lambda m: '')] # preprocess_regexps = [(re.compile(r'<!--.*?-->', re.DOTALL), lambda m: '')]
#Remove annoying map links (inline-caption class is also used for some image captions! hence regex to match maps.google) # Remove annoying map links (inline-caption class is also used for some
preprocess_regexps = [(re.compile(r'<a class="inline-caption" href="http://maps\.google\.com.*?/a>', re.DOTALL), lambda m: '')] # image captions! hence regex to match maps.google)
preprocess_regexps = [(re.compile(
r'<a class="inline-caption" href="http://maps\.google\.com.*?/a>', re.DOTALL), lambda m: '')]
conversion_options = { conversion_options = {
'comments' : description 'comments': description, 'tags': category, 'language': language, 'publisher': publisher, 'linearize_tables': False
,'tags' : category
,'language' : language
,'publisher' : publisher
,'linearize_tables': False
} }
keep_only_tags = [dict(attrs={'class': ['article section']})] keep_only_tags = [dict(attrs={'class': ['article section']})]
@ -52,5 +50,6 @@ class ABCNews(BasicNewsRecipe):
('Australia', 'http://www.abc.net.au/news/feed/46182/rss.xml'), ('Australia', 'http://www.abc.net.au/news/feed/46182/rss.xml'),
('World', 'http://www.abc.net.au/news/feed/52278/rss.xml'), ('World', 'http://www.abc.net.au/news/feed/52278/rss.xml'),
('Business', 'http://www.abc.net.au/news/feed/51892/rss.xml'), ('Business', 'http://www.abc.net.au/news/feed/51892/rss.xml'),
('Science and Technology', 'http://www.abc.net.au/news/feed/2298/rss.xml'), ('Science and Technology',
'http://www.abc.net.au/news/feed/2298/rss.xml'),
] ]

View File

@ -10,6 +10,7 @@ http://www.abc.es/
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1296604369(BasicNewsRecipe): class AdvancedUserRecipe1296604369(BasicNewsRecipe):
title = u'ABC.es' title = u'ABC.es'
@ -39,19 +40,20 @@ class AdvancedUserRecipe1296604369(BasicNewsRecipe):
""" """
feeds = [ feeds = [
(u'PORTADA', u'http://www.abc.es/rss/feeds/abcPortada.xml')
,(u'ULTIMAS', u'http://www.abc.es/rss/feeds/abc_ultima.xml') (u'PORTADA', u'http://www.abc.es/rss/feeds/abcPortada.xml'),
,(u'NACIONAL', u'http://www.abc.es/rss/feeds/abc_EspanaEspana.xml') (u'ULTIMAS', u'http://www.abc.es/rss/feeds/abc_ultima.xml'),
,(u'INTERNACIONAL', u'http://www.abc.es/rss/feeds/abc_Internacional.xml') (u'NACIONAL', u'http://www.abc.es/rss/feeds/abc_EspanaEspana.xml'),
,(u'OPINION', u'http://www.abc.es/rss/feeds/abc_opinioncompleto.xml') (u'INTERNACIONAL', u'http://www.abc.es/rss/feeds/abc_Internacional.xml'),
,(u'BLOGS ABC', u'http://www.abc.es/rss/feeds/blogs-abc.xml') (u'OPINION', u'http://www.abc.es/rss/feeds/abc_opinioncompleto.xml'),
,(u'ECONOMIA', u'http://www.abc.es/rss/feeds/abc_Economia.xml') (u'BLOGS ABC', u'http://www.abc.es/rss/feeds/blogs-abc.xml'),
,(u'CIENCIA Y TECNOLOGIA', u'http://www.abc.es/rss/feeds/abc_Ciencia_Tecnologia.xml') (u'ECONOMIA', u'http://www.abc.es/rss/feeds/abc_Economia.xml'),
,(u'CULTURA', u'http://www.abc.es/rss/feeds/abc_Cultura.xml') (u'CIENCIA Y TECNOLOGIA', u'http://www.abc.es/rss/feeds/abc_Ciencia_Tecnologia.xml'),
,(u'LIBROS', u'http://www.abc.es/rss/feeds/abc_Libros.xml') (u'CULTURA', u'http://www.abc.es/rss/feeds/abc_Cultura.xml'),
,(u'MEDIOS Y REDES', u'http://www.abc.es/rss/feeds/ABC_Medios_Redes.xml') (u'LIBROS', u'http://www.abc.es/rss/feeds/abc_Libros.xml'),
,(u'EVASION', u'http://www.abc.es/rss/feeds/abc_evasion.xml') (u'MEDIOS Y REDES', u'http://www.abc.es/rss/feeds/ABC_Medios_Redes.xml'),
,(u'ESPECTACULOS', u'http://www.abc.es/rss/feeds/abc_Espectaculos.xml') (u'EVASION', u'http://www.abc.es/rss/feeds/abc_evasion.xml'),
,(u'GENTE', u'http://www.abc.es/rss/feeds/abc_Gente.xml') (u'ESPECTACULOS', u'http://www.abc.es/rss/feeds/abc_Espectaculos.xml'),
,(u'DEPORTES', u'http://www.abc.es/rss/feeds/abc_Deportes.xml') (u'GENTE', u'http://www.abc.es/rss/feeds/abc_Gente.xml'),
(u'DEPORTES', u'http://www.abc.es/rss/feeds/abc_Deportes.xml')
] ]

View File

@ -6,6 +6,7 @@ abc.com.py
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
class ABC_py(BasicNewsRecipe): class ABC_py(BasicNewsRecipe):
title = 'ABC Color' title = 'ABC Color'
__author__ = 'Darko Miletic' __author__ = 'Darko Miletic'
@ -27,25 +28,23 @@ class ABC_py(BasicNewsRecipe):
""" """
conversion_options = { conversion_options = {
'comment' : description 'comment': description, 'tags': category, 'publisher': publisher, 'language': language
, 'tags' : category
, 'publisher' : publisher
, 'language' : language
} }
remove_tags = [ remove_tags = [
dict(name=['form','iframe','embed','object','link','base','table']), dict(name=['form', 'iframe', 'embed',
'object', 'link', 'base', 'table']),
dict(attrs={'class': ['es-carousel-wrapper']}), dict(attrs={'class': ['es-carousel-wrapper']}),
dict(attrs={'id': ['tools', 'article-banner-1']}) dict(attrs={'id': ['tools', 'article-banner-1']})
] ]
keep_only_tags = [dict(attrs={'id': 'article'})] keep_only_tags = [dict(attrs={'id': 'article'})]
feeds = [ feeds = [
(u'Ultimo momento', u'http://www.abc.com.py/rss.xml' )
,(u'Nacionales' , u'http://www.abc.com.py/nacionales/rss.xml' ) (u'Ultimo momento', u'http://www.abc.com.py/rss.xml'),
,(u'Mundo' , u'http://www.abc.com.py/internacionales/rss.xml') (u'Nacionales', u'http://www.abc.com.py/nacionales/rss.xml'),
,(u'Deportes' , u'http://www.abc.com.py/deportes/rss.xml' ) (u'Mundo', u'http://www.abc.com.py/internacionales/rss.xml'),
,(u'Espectaculos' , u'http://www.abc.com.py/espectaculos/rss.xml' ) (u'Deportes', u'http://www.abc.com.py/deportes/rss.xml'),
,(u'TecnoCiencia' , u'http://www.abc.com.py/ciencia/rss.xml' ) (u'Espectaculos', u'http://www.abc.com.py/espectaculos/rss.xml'),
(u'TecnoCiencia', u'http://www.abc.com.py/ciencia/rss.xml')
] ]

View File

@ -8,6 +8,7 @@ www.accountancyage.com
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
class AccountancyAge(BasicNewsRecipe): class AccountancyAge(BasicNewsRecipe):
title = 'Accountancy Age' title = 'Accountancy Age'
__author__ = 'Darko Miletic' __author__ = 'Darko Miletic'
@ -23,7 +24,8 @@ class AccountancyAge(BasicNewsRecipe):
lang = 'en' lang = 'en'
language = 'en' language = 'en'
feeds = [(u'All News', u'http://feeds.accountancyage.com/rss/latest/accountancyage/all')] feeds = [
(u'All News', u'http://feeds.accountancyage.com/rss/latest/accountancyage/all')]
keep_only_tags = [ keep_only_tags = [
dict(name='h1'), dict(name='h1'),

View File

@ -2,6 +2,7 @@
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1334868409(BasicNewsRecipe): class AdvancedUserRecipe1334868409(BasicNewsRecipe):
title = u'AÇIK BİLİM DERGİSİ' title = u'AÇIK BİLİM DERGİSİ'
description = ' Aylık çevrimiçi bilim dergisi' description = ' Aylık çevrimiçi bilim dergisi'
@ -15,13 +16,9 @@ class AdvancedUserRecipe1334868409(BasicNewsRecipe):
language = 'tr' language = 'tr'
publication_type = 'magazine ' publication_type = 'magazine '
conversion_options = { conversion_options = {
'tags' : category 'tags': category, 'language': language, 'publisher': publisher, 'linearize_tables': True
,'language' : language
,'publisher' : publisher
,'linearize_tables': True
} }
cover_img_url = 'http://www.acikbilim.com/wp-content/themes/Equilibrium/images/logodene.jpg' cover_img_url = 'http://www.acikbilim.com/wp-content/themes/Equilibrium/images/logodene.jpg'
masthead_url = 'http://www.acikbilim.com/wp-content/themes/Equilibrium/images/logodene.jpg' masthead_url = 'http://www.acikbilim.com/wp-content/themes/Equilibrium/images/logodene.jpg'
feeds = [(u'Tüm Yayınlar', u'http://www.acikbilim.com/feed')] feeds = [(u'Tüm Yayınlar', u'http://www.acikbilim.com/feed')]

View File

@ -10,6 +10,7 @@ acrimed.org
import re import re
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
class Acrimed(BasicNewsRecipe): class Acrimed(BasicNewsRecipe):
title = u'Acrimed' title = u'Acrimed'
__author__ = 'Gaëtan Lehmann' __author__ = 'Gaëtan Lehmann'
@ -22,7 +23,8 @@ class Acrimed(BasicNewsRecipe):
feeds = [(u'Acrimed', u'http://www.acrimed.org/spip.php?page=backend')] feeds = [(u'Acrimed', u'http://www.acrimed.org/spip.php?page=backend')]
preprocess_regexps = [ preprocess_regexps = [
(re.compile(r'<title>(.*) - Acrimed \| Action Critique M.*dias</title>'), lambda m: '<title>' + m.group(1) + '</title>'), (re.compile(r'<title>(.*) - Acrimed \| Action Critique M.*dias</title>'),
lambda m: '<title>' + m.group(1) + '</title>'),
(re.compile(r'<h2>(.*) - Acrimed \| Action Critique M.*dias</h2>'), lambda m: '<h2>' + m.group(1) + '</h2>')] (re.compile(r'<h2>(.*) - Acrimed \| Action Critique M.*dias</h2>'), lambda m: '<h2>' + m.group(1) + '</h2>')]
extra_css = """ extra_css = """

View File

@ -1,6 +1,7 @@
import re import re
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
class ADRecipe(BasicNewsRecipe): class ADRecipe(BasicNewsRecipe):
__license__ = 'GPL v3' __license__ = 'GPL v3'
__author__ = 'kwetal' __author__ = 'kwetal'
@ -27,36 +28,49 @@ class ADRecipe(BasicNewsRecipe):
remove_tags = [] remove_tags = []
remove_tags.append(dict(name='div', attrs={'class': 'gen_clear'})) remove_tags.append(dict(name='div', attrs={'class': 'gen_clear'}))
remove_tags.append(dict(name = 'div', attrs = {'class': re.compile(r'gen_spacer.*')})) remove_tags.append(
dict(name='div', attrs={'class': re.compile(r'gen_spacer.*')}))
remove_attributes = ['style'] remove_attributes = ['style']
# feeds from http://ad.nl/ad/nl/1401/home/integration/nmc/frameset/ad_footer/rssFeeds.dhtml # feeds from
# http://ad.nl/ad/nl/1401/home/integration/nmc/frameset/ad_footer/rssFeeds.dhtml
feeds = [] feeds = []
feeds.append((u'Binnenland', u'http://www.ad.nl/nieuws/binnenland/rss.xml')) feeds.append(
feeds.append((u'Buitenland', u'http://www.ad.nl/nieuws/buitenland/rss.xml')) (u'Binnenland', u'http://www.ad.nl/nieuws/binnenland/rss.xml'))
feeds.append(
(u'Buitenland', u'http://www.ad.nl/nieuws/buitenland/rss.xml'))
feeds.append((u'Bizar', u'http://www.ad.nl/nieuws/bizar/rss.xml')) feeds.append((u'Bizar', u'http://www.ad.nl/nieuws/bizar/rss.xml'))
feeds.append((u'Gezondheid & Wetenschap', u'http://www.ad.nl/nieuws/gezondheidwetenschap/rss.xml')) feeds.append((u'Gezondheid & Wetenschap',
u'http://www.ad.nl/nieuws/gezondheidwetenschap/rss.xml'))
feeds.append((u'Economie', u'http://www.ad.nl/nieuws/economie/rss.xml')) feeds.append((u'Economie', u'http://www.ad.nl/nieuws/economie/rss.xml'))
feeds.append((u'Nederlands Voetbal', u'http://www.ad.nl/sportwereld/nederlandsvoetbal/rss.xml')) feeds.append((u'Nederlands Voetbal',
feeds.append((u'Buitenlands Voetbal', u'http://www.ad.nl/sportwereld/buitenlandsvoetbal/rss.xml')) u'http://www.ad.nl/sportwereld/nederlandsvoetbal/rss.xml'))
feeds.append((u'Champions League/Europa League', u'http://www.ad.nl/sportwereld/championsleagueeuropaleague/rss.xml')) feeds.append((u'Buitenlands Voetbal',
feeds.append((u'Wielrennen', u'http://www.ad.nl/sportwereld/wielrennen/rss.xml')) u'http://www.ad.nl/sportwereld/buitenlandsvoetbal/rss.xml'))
feeds.append((u'Champions League/Europa League',
u'http://www.ad.nl/sportwereld/championsleagueeuropaleague/rss.xml'))
feeds.append(
(u'Wielrennen', u'http://www.ad.nl/sportwereld/wielrennen/rss.xml'))
feeds.append((u'Tennis', u'http://www.ad.nl/sportwereld/tennis/rss.xml')) feeds.append((u'Tennis', u'http://www.ad.nl/sportwereld/tennis/rss.xml'))
feeds.append((u'Formule 1', u'http://www.ad.nl/sportwereld/formule1/rss.xml')) feeds.append(
feeds.append((u'Meer Sport', u'http://www.ad.nl/sportwereld/meersport/rss.xml')) (u'Formule 1', u'http://www.ad.nl/sportwereld/formule1/rss.xml'))
feeds.append(
(u'Meer Sport', u'http://www.ad.nl/sportwereld/meersport/rss.xml'))
feeds.append((u'Celebs', u'http://www.ad.nl/showbizz/celebs/rss.xml')) feeds.append((u'Celebs', u'http://www.ad.nl/showbizz/celebs/rss.xml'))
feeds.append((u'Film', u'http://www.ad.nl/showbizz/film/rss.xml')) feeds.append((u'Film', u'http://www.ad.nl/showbizz/film/rss.xml'))
feeds.append((u'Muziek', u'http://www.ad.nl/showbizz/muziek/rss.xml')) feeds.append((u'Muziek', u'http://www.ad.nl/showbizz/muziek/rss.xml'))
feeds.append((u'TV', u'http://www.ad.nl/showbizz/tv/rss.xml')) feeds.append((u'TV', u'http://www.ad.nl/showbizz/tv/rss.xml'))
feeds.append((u'Kunst & Literatuur', u'http://www.ad.nl/showbizz/kunstenliteratuur/rss.xml')) feeds.append((u'Kunst & Literatuur',
u'http://www.ad.nl/showbizz/kunstenliteratuur/rss.xml'))
feeds.append((u'Jouw Wereld', u'http://www.ad.nl/you/rss.xml')) feeds.append((u'Jouw Wereld', u'http://www.ad.nl/you/rss.xml'))
feeds.append((u'Consument', u'http://www.ad.nl/consument/rss.xml')) feeds.append((u'Consument', u'http://www.ad.nl/consument/rss.xml'))
feeds.append((u'Autowereld', u'http://www.ad.nl/autowereld/rss.xml')) feeds.append((u'Autowereld', u'http://www.ad.nl/autowereld/rss.xml'))
feeds.append((u'Reiswereld', u'http://www.ad.nl/reiswereld/rss.xml')) feeds.append((u'Reiswereld', u'http://www.ad.nl/reiswereld/rss.xml'))
feeds.append((u'Internet', u'http://www.ad.nl/digitaal/internet/rss.xml')) feeds.append((u'Internet', u'http://www.ad.nl/digitaal/internet/rss.xml'))
feeds.append((u'Games', u'http://www.ad.nl/digitaal/games/rss.xml')) feeds.append((u'Games', u'http://www.ad.nl/digitaal/games/rss.xml'))
feeds.append((u'Multimedia', u'http://www.ad.nl/digitaal/multimedia/rss.xml')) feeds.append(
(u'Multimedia', u'http://www.ad.nl/digitaal/multimedia/rss.xml'))
feeds.append((u'Planet Watch', u'http://www.ad.nl/planetwatch/rss.xml')) feeds.append((u'Planet Watch', u'http://www.ad.nl/planetwatch/rss.xml'))
extra_css = ''' extra_css = '''
@ -71,7 +85,8 @@ class ADRecipe(BasicNewsRecipe):
def print_version(self, url): def print_version(self, url):
parts = url.split('/') parts = url.split('/')
print_url = 'http://' + parts[2] + '/' + parts[3] + '/' + parts[4] + '/' + parts[5] + '/' \ print_url = 'http://' + parts[2] + '/' + parts[3] + '/' + parts[4] + '/' + parts[5] + '/' \
+ parts[10] + '/' + parts[7] + '/print/' + parts[8] + '/' + parts[9] + '/' + parts[13] + parts[10] + '/' + parts[7] + '/print/' + \
parts[8] + '/' + parts[9] + '/' + parts[13]
return print_url return print_url

View File

@ -9,6 +9,7 @@ adevarul.ro
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
class Adevarul(BasicNewsRecipe): class Adevarul(BasicNewsRecipe):
title = u'Adev\u0103rul' title = u'Adev\u0103rul'
language = 'ro' language = 'ro'
@ -25,27 +26,14 @@ class Adevarul(BasicNewsRecipe):
cover_url = 'http://upload.wikimedia.org/wikipedia/en/d/d6/Logo_noul_adevarul.png' cover_url = 'http://upload.wikimedia.org/wikipedia/en/d/d6/Logo_noul_adevarul.png'
conversion_options = { conversion_options = {
'comments' : description 'comments': description, 'tags': category, 'language': language, 'publisher': publisher
,'tags' : category
,'language' : language
,'publisher' : publisher
} }
keep_only_tags = [ dict(name='div', attrs={'class':'article_header'}) keep_only_tags = [dict(name='div', attrs={'class': 'article_header'}), dict(name='div', attrs={'class': 'bb-tu first-t bb-article-body'})
,dict(name='div', attrs={'class':'bb-tu first-t bb-article-body'})
] ]
remove_tags = [ remove_tags = [
dict(name='li', attrs={'class':'author'}) dict(name='li', attrs={'class': 'author'}), dict(name='li', attrs={'class': 'date'}), dict(name='li', attrs={'class': 'comments'}), dict(name='div', attrs={'class': 'bb-wg-article_related_attachements'}), dict(name='div', attrs={'class': 'bb-md bb-md-article_comments'}), dict(name='form', attrs={'id': 'bb-comment-create-form'}), dict(name='div', attrs={'id': 'mediatag'}), dict(name='div', attrs={'id': 'ft'}), dict(name='div', attrs={'id': 'comment_wrapper'}) # noqa
,dict(name='li', attrs={'class':'date'})
,dict(name='li', attrs={'class':'comments'})
,dict(name='div', attrs={'class':'bb-wg-article_related_attachements'})
,dict(name='div', attrs={'class':'bb-md bb-md-article_comments'})
,dict(name='form', attrs={'id':'bb-comment-create-form'})
,dict(name='div', attrs={'id':'mediatag'})
,dict(name='div', attrs={'id':'ft'})
,dict(name='div', attrs={'id':'comment_wrapper'})
] ]
remove_tags_after = [ remove_tags_after = [
@ -56,4 +44,3 @@ class Adevarul(BasicNewsRecipe):
def preprocess_html(self, soup): def preprocess_html(self, soup):
return self.adeify_images(soup) return self.adeify_images(soup)

View File

@ -10,6 +10,7 @@ http://www.adnkronos.com/
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
class Adnkronos(BasicNewsRecipe): class Adnkronos(BasicNewsRecipe):
__author__ = 'Gabriele Marini' __author__ = 'Gabriele Marini'
description = 'News agency' description = 'News agency'
@ -27,6 +28,7 @@ class Adnkronos(BasicNewsRecipe):
recursion = 10 recursion = 10
remove_javascript = True remove_javascript = True
def get_article_url(self, article): def get_article_url(self, article):
link = article.get('id', article.get('guid', None)) link = article.get('id', article.get('guid', None))
return link return link
@ -35,12 +37,10 @@ class Adnkronos(BasicNewsRecipe):
keep_only_tags = [dict(name='div', attrs={'class': ['breadCrumbs', 'newsTop', 'newsText']}) keep_only_tags = [dict(name='div', attrs={'class': ['breadCrumbs', 'newsTop', 'newsText']})
] ]
remove_tags = [ remove_tags = [
dict(name='div', attrs={'class': ['leogoo', 'leogoo2']}) dict(name='div', attrs={'class': ['leogoo', 'leogoo2']})
] ]
feeds = [ feeds = [
(u'Prima Pagina', u'http://rss.adnkronos.com/RSS_PrimaPagina.xml'), (u'Prima Pagina', u'http://rss.adnkronos.com/RSS_PrimaPagina.xml'),
(u'Ultima Ora', u'http://rss.adnkronos.com/RSS_Ultimora.xml'), (u'Ultima Ora', u'http://rss.adnkronos.com/RSS_Ultimora.xml'),
@ -56,4 +56,3 @@ class Adnkronos(BasicNewsRecipe):
(u'Sostenibilita', u'http://rss.adnkronos.com/RSS_Sostenibilita.xml'), (u'Sostenibilita', u'http://rss.adnkronos.com/RSS_Sostenibilita.xml'),
(u'Salute', u'http://rss.adnkronos.com/RSS_Salute.xml') (u'Salute', u'http://rss.adnkronos.com/RSS_Salute.xml')
] ]

View File

@ -1,5 +1,6 @@
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1336986047(BasicNewsRecipe): class AdvancedUserRecipe1336986047(BasicNewsRecipe):
title = u'Ads of the World' title = u'Ads of the World'
oldest_article = 7 oldest_article = 7
@ -15,12 +16,11 @@ class AdvancedUserRecipe1336986047(BasicNewsRecipe):
] ]
remove_tags = [ remove_tags = [
dict(name='ul', attrs={'class':'links inline'}) dict(name='ul', attrs={'class': 'links inline'}), dict(name='div', attrs={'class': 'form-item'}), dict(
,dict(name='div', attrs={'class':'form-item'}) name='div', attrs={'id': ['options', 'comments']}), dict(name='ul', attrs={'id': 'nodePager'})
,dict(name='div', attrs={'id':['options', 'comments']})
,dict(name='ul', attrs={'id':'nodePager'})
] ]
reverse_article_order = True reverse_article_order = True
masthead_url = 'http://bigcatgroup.co.uk/files/2011/01/05-ads-of-the-world.png' masthead_url = 'http://bigcatgroup.co.uk/files/2011/01/05-ads-of-the-world.png'
feeds = [(u'Ads of the world', u'http://feeds.feedburner.com/adsoftheworld-latest')] feeds = [
(u'Ads of the world', u'http://feeds.feedburner.com/adsoftheworld-latest')]

View File

@ -1,8 +1,10 @@
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
class Adventure_zone(BasicNewsRecipe): class Adventure_zone(BasicNewsRecipe):
title = u'Adventure Zone' title = u'Adventure Zone'
__author__ = 'fenuks' __author__ = 'fenuks'
description = u'Czytaj więcej o przygodzie - codzienne nowinki. Szukaj u nas solucji i poradników, czytaj recenzje i zapowiedzi. Także galeria, pliki oraz forum dla wszystkich fanów gier przygodowych.' description = u'Czytaj więcej o przygodzie - codzienne nowinki. Szukaj u nas solucji i poradników, czytaj recenzje i zapowiedzi. Także galeria, pliki oraz forum dla wszystkich fanów gier przygodowych.' # noqa
category = 'games' category = 'games'
language = 'pl' language = 'pl'
BASEURL = 'http://www.adventure-zone.info/fusion/' BASEURL = 'http://www.adventure-zone.info/fusion/'

View File

@ -6,6 +6,7 @@ www.adventuregamers.com
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
class AdventureGamers(BasicNewsRecipe): class AdventureGamers(BasicNewsRecipe):
title = u'Adventure Gamers' title = u'Adventure Gamers'
language = 'en' language = 'en'
@ -14,7 +15,6 @@ class AdventureGamers(BasicNewsRecipe):
publisher = 'Adventure Gamers' publisher = 'Adventure Gamers'
category = 'news, games, adventure, technology' category = 'news, games, adventure, technology'
oldest_article = 10 oldest_article = 10
#delay = 10
max_articles_per_feed = 100 max_articles_per_feed = 100
no_stylesheets = True no_stylesheets = True
encoding = 'utf8' encoding = 'utf8'
@ -35,17 +35,13 @@ class AdventureGamers(BasicNewsRecipe):
""" """
conversion_options = { conversion_options = {
'comment' : description 'comment': description, 'tags': category, 'publisher': publisher, 'language': language
, 'tags' : category
, 'publisher' : publisher
, 'language' : language
} }
keep_only_tags = [dict(name='div', attrs={'class': 'cleft_inn'})] keep_only_tags = [dict(name='div', attrs={'class': 'cleft_inn'})]
remove_tags = [ remove_tags = [
dict(name=['object','link','embed','form','iframe','meta']) dict(name=['object', 'link', 'embed', 'form', 'iframe', 'meta']), dict(name='a', attrs={
,dict(name='a', attrs={'href':'http://www.adventuregamers.com/about/scoring'}) 'href': 'http://www.adventuregamers.com/about/scoring'}), dict(name='a', attrs={'href': 'http://www.adventuregamers.com/about/policies'})
,dict(name='a', attrs={'href':'http://www.adventuregamers.com/about/policies'})
] ]
remove_tags_after = [dict(name='div', attrs={'class': 'bodytext'})] remove_tags_after = [dict(name='div', attrs={'class': 'bodytext'})]
remove_attributes = ['width', 'height'] remove_attributes = ['width', 'height']
@ -74,7 +70,6 @@ class AdventureGamers(BasicNewsRecipe):
pager.extract() pager.extract()
appendtag.insert(position, texttag) appendtag.insert(position, texttag)
def preprocess_html(self, soup): def preprocess_html(self, soup):
for item in soup.findAll(style=True): for item in soup.findAll(style=True):
del item['style'] del item['style']

View File

@ -1,5 +1,6 @@
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
class Aftenposten(BasicNewsRecipe): class Aftenposten(BasicNewsRecipe):
title = u'Aftenposten' title = u'Aftenposten'
__author__ = 'davotibarna' __author__ = 'davotibarna'
@ -17,4 +18,3 @@ class Aftenposten(BasicNewsRecipe):
def print_version(self, url): def print_version(self, url):
return url.replace('#xtor=RSS-3', '?service=print') return url.replace('#xtor=RSS-3', '?service=print')

View File

@ -8,6 +8,7 @@ boljevac.blogspot.com
import re import re
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
class AgroGerila(BasicNewsRecipe): class AgroGerila(BasicNewsRecipe):
title = 'Agro Gerila' title = 'Agro Gerila'
__author__ = 'Darko Miletic' __author__ = 'Darko Miletic'
@ -19,13 +20,10 @@ class AgroGerila(BasicNewsRecipe):
no_stylesheets = True no_stylesheets = True
use_embedded_content = True use_embedded_content = True
publication_type = 'blog' publication_type = 'blog'
extra_css = ' @font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: "Trebuchet MS",Trebuchet,Verdana,sans1,sans-serif} .article_description{font-family: sans1, sans-serif} img{margin-bottom: 0.8em; border: 1px solid #333333; padding: 4px } ' extra_css = ' @font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: "Trebuchet MS",Trebuchet,Verdana,sans1,sans-serif} .article_description{font-family: sans1, sans-serif} img{margin-bottom: 0.8em; border: 1px solid #333333; padding: 4px } ' # noqa
conversion_options = { conversion_options = {
'comment' : description 'comment': description, 'tags': 'film, blog, srbija', 'publisher': 'Dry-Na-Nord', 'language': language
, 'tags' : 'film, blog, srbija'
, 'publisher': 'Dry-Na-Nord'
, 'language' : language
} }
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')] preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
@ -36,5 +34,3 @@ class AgroGerila(BasicNewsRecipe):
for item in soup.findAll(style=True): for item in soup.findAll(style=True):
del item['style'] del item['style']
return self.adeify_images(soup) return self.adeify_images(soup)

View File

@ -6,6 +6,7 @@ www.aif.ru
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
class AIF_ru(BasicNewsRecipe): class AIF_ru(BasicNewsRecipe):
title = 'Arguments & Facts - Russian' title = 'Arguments & Facts - Russian'
__author__ = 'Darko Miletic' __author__ = 'Darko Miletic'
@ -25,16 +26,12 @@ class AIF_ru(BasicNewsRecipe):
img{display: block} img{display: block}
""" """
keep_only_tags = [ keep_only_tags = [
dict(name='h1', attrs={'class':'title'}) dict(name='h1', attrs={'class': 'title'}), dict(name='div', attrs={'class': 'prew_tags'}), dict(
,dict(name='div', attrs={'class':'prew_tags'}) name='article', attrs={'class': lambda x: x and 'articl_body' in x.split()})
,dict(name='article', attrs={'class':lambda x: x and 'articl_body' in x.split()})
] ]
remove_tags = [ remove_tags = [
dict(name=['iframe','object','link','base','input','meta']) dict(name=['iframe', 'object', 'link', 'base', 'input', 'meta']), dict(name='div', attrs={'class': 'in-topic'}), dict(name='div', attrs={
,dict(name='div',attrs={'class':'in-topic'}) 'class': lambda x: x and 'related_article' in x.split()}), dict(name='div', attrs={'class': lambda x: x and 'articl_tag' in x.split()})
,dict(name='div', attrs={'class':lambda x: x and 'related_article' in x.split()})
,dict(name='div', attrs={'class':lambda x: x and 'articl_tag' in x.split()})
] ]
feeds = [(u'News', u'http://www.aif.ru/rss/all.php')] feeds = [(u'News', u'http://www.aif.ru/rss/all.php')]

View File

@ -1,5 +1,6 @@
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
class AirForceTimes(BasicNewsRecipe): class AirForceTimes(BasicNewsRecipe):
title = 'Air Force Times' title = 'Air Force Times'
__author__ = 'jde' __author__ = 'jde'
@ -24,8 +25,6 @@ class AirForceTimes(BasicNewsRecipe):
remove_empty_feeds = True remove_empty_feeds = True
auto_cleanup = True auto_cleanup = True
feeds = [ feeds = [
('News', 'http://www.airforcetimes.com/rss_news.php'), ('News', 'http://www.airforcetimes.com/rss_news.php'),
@ -37,7 +36,3 @@ class AirForceTimes(BasicNewsRecipe):
('Entertainment', 'http://www.airforcetimes.com/rss_entertainment.php'), ('Entertainment', 'http://www.airforcetimes.com/rss_entertainment.php'),
('Guard & Reserve', 'http://www.airforcetimes.com/rss_guard.php'), ('Guard & Reserve', 'http://www.airforcetimes.com/rss_guard.php'),
] ]

View File

@ -6,10 +6,12 @@ __version__ = '0.1'
__date__ = '2015/01/10' __date__ = '2015/01/10'
__docformat__ = 'restructuredtext en' __docformat__ = 'restructuredtext en'
import datetime, re import datetime
import re
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import Tag from calibre.ebooks.BeautifulSoup import Tag
class AdvancedUserRecipe1282101454(BasicNewsRecipe): class AdvancedUserRecipe1282101454(BasicNewsRecipe):
now = datetime.datetime.now() now = datetime.datetime.now()
title = 'The AJC' title = 'The AJC'
@ -24,13 +26,15 @@ class AdvancedUserRecipe1282101454(BasicNewsRecipe):
max_articles_per_feed = 100 max_articles_per_feed = 100
no_stylesheets = True no_stylesheets = True
# The AJC lists identical articles in multiple feeds; this removes them based on their URL # The AJC lists identical articles in multiple feeds; this removes them
# based on their URL
ignore_duplicate_articles = {'title', 'url'} ignore_duplicate_articles = {'title', 'url'}
# And this says "Hey, AJC, different feeds should mean something!" # And this says "Hey, AJC, different feeds should mean something!"
remove_empty_feeds = True remove_empty_feeds = True
# Sets whether a feed has full articles embedded in it. The AJC feeds do not. # Sets whether a feed has full articles embedded in it. The AJC feeds do
# not.
use_embedded_content = False use_embedded_content = False
masthead_url = 'http://gawand.org/wp-content/uploads/2010/06/ajc-logo.gif' masthead_url = 'http://gawand.org/wp-content/uploads/2010/06/ajc-logo.gif'
@ -39,7 +43,8 @@ class AdvancedUserRecipe1282101454(BasicNewsRecipe):
# articels will be dropped. # articels will be dropped.
feeds = [ feeds = [
('Breaking News', 'http://www.ajc.com/list/rss/online/ajc-auto-list-iphone-topnews/aFKq/'), ('Breaking News', 'http://www.ajc.com/list/rss/online/ajc-auto-list-iphone-topnews/aFKq/'),
('Metro and Georgia', 'http://www.ajc.com/list/rss/news/local/news-georgia-and-region/aCxP/'), ('Metro and Georgia',
'http://www.ajc.com/list/rss/news/local/news-georgia-and-region/aCxP/'),
('Business', 'http://www.ajc.com/feeds/categories/business/'), ('Business', 'http://www.ajc.com/feeds/categories/business/'),
('Health', 'http://www.ajc.com/feeds/categories/health/'), ('Health', 'http://www.ajc.com/feeds/categories/health/'),
# ('Braves', 'http://www.ajc.com/list/rss/sports/baseball/atlanta-braves-news/aGpN/'), # ('Braves', 'http://www.ajc.com/list/rss/sports/baseball/atlanta-braves-news/aGpN/'),
@ -52,18 +57,22 @@ class AdvancedUserRecipe1282101454(BasicNewsRecipe):
author_reg_exp = '^.*cm-story-author.*$' author_reg_exp = '^.*cm-story-author.*$'
keep_only_tags = [ keep_only_tags = [
dict(name='div', attrs={'class':re.compile(headline_reg_exp, re.IGNORECASE)}), dict(name='div', attrs={'class': re.compile(
headline_reg_exp, re.IGNORECASE)}),
dict(name='div', attrs={'class': 'cm-story-meta'}), dict(name='div', attrs={'class': 'cm-story-meta'}),
dict(name='div', attrs={'class':re.compile(author_reg_exp, re.IGNORECASE)}), dict(name='div', attrs={'class': re.compile(
author_reg_exp, re.IGNORECASE)}),
dict(name='meta', attrs={'name': 'description'}), dict(name='meta', attrs={'name': 'description'}),
dict(name='div', attrs={'class':re.compile(story_body_reg_exp, re.IGNORECASE)}), dict(name='div', attrs={'class': re.compile(
story_body_reg_exp, re.IGNORECASE)}),
] ]
premium_reg_exp = '^.*cmPremiumContent.*$' premium_reg_exp = '^.*cmPremiumContent.*$'
footer_reg_exp = '^.*cm-story-footer.*$' footer_reg_exp = '^.*cm-story-footer.*$'
remove_tags = [ remove_tags = [
dict(name='div', attrs={'class':re.compile(footer_reg_exp, re.IGNORECASE)}), dict(name='div', attrs={'class': re.compile(
footer_reg_exp, re.IGNORECASE)}),
dict(name='div', attrs={'class': 'cm-inline-related-group'}) dict(name='div', attrs={'class': 'cm-inline-related-group'})
] ]
@ -74,9 +83,11 @@ class AdvancedUserRecipe1282101454(BasicNewsRecipe):
.cm-story-author { display: block; font-size: 80%; font-style: italic; }' .cm-story-author { display: block; font-size: 80%; font-style: italic; }'
# I would love to remove these completely from the finished product, but I can't see how at the momemnt. # I would love to remove these completely from the finished product, but I can't see how at the momemnt.
# Retuning "None" from preprocess_html(soup) as suggested in mobileread forums leads to errors. # Retuning "None" from preprocess_html(soup) as suggested in mobileread
# forums leads to errors.
def preprocess_html(self, soup): def preprocess_html(self, soup):
premium = soup.find('div', attrs={'class':re.compile(self.premium_reg_exp, re.IGNORECASE)}) premium = soup.find('div', attrs={'class': re.compile(
self.premium_reg_exp, re.IGNORECASE)})
if premium: if premium:
return None return None
crosslink = soup.find('a', attrs={'class': 'cm-feed-story-more-link'}) crosslink = soup.find('a', attrs={'class': 'cm-feed-story-more-link'})
@ -101,7 +112,7 @@ class AdvancedUserRecipe1282101454(BasicNewsRecipe):
for div in soup.findAll('div', attrs={'class': re.compile(self.author_reg_exp, re.IGNORECASE)}): for div in soup.findAll('div', attrs={'class': re.compile(self.author_reg_exp, re.IGNORECASE)}):
div.extract() div.extract()
for auth in div.findAll('a'): for auth in div.findAll('a'):
if (auth.has_key('class') and auth['class'] == 'cm-source-image'): if (auth.has_key('class') and auth['class'] == 'cm-source-image'): # noqa
continue continue
names = names + comma + auth.contents[0] names = names + comma + auth.contents[0]
comma = ', ' comma = ', '
@ -113,4 +124,3 @@ class AdvancedUserRecipe1282101454(BasicNewsRecipe):
meta = soup.find('div', attrs={'class': 'cm-story-meta'}) meta = soup.find('div', attrs={'class': 'cm-story-meta'})
meta_idx = meta.parent.contents.index(meta) meta_idx = meta.parent.contents.index(meta)
meta.parent.insert(meta_idx + 1, tag) meta.parent.insert(meta_idx + 1, tag)

View File

@ -6,6 +6,7 @@ ajiajin.com/blog
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
class AjiajinBlog(BasicNewsRecipe): class AjiajinBlog(BasicNewsRecipe):
title = u'Ajiajin blog' title = u'Ajiajin blog'
__author__ = 'Hiroshi Miura' __author__ = 'Hiroshi Miura'
@ -19,5 +20,3 @@ class AjiajinBlog(BasicNewsRecipe):
encoding = 'utf-8' encoding = 'utf-8'
feeds = [(u'blog', u'http://feeds.feedburner.com/Asiajin')] feeds = [(u'blog', u'http://feeds.feedburner.com/Asiajin')]

View File

@ -2,6 +2,7 @@
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
class Aksiyon (BasicNewsRecipe): class Aksiyon (BasicNewsRecipe):
title = u'Aksiyon Dergisi' title = u'Aksiyon Dergisi'
@ -10,8 +11,6 @@ class Aksiyon (BasicNewsRecipe):
oldest_article = 13 oldest_article = 13
max_articles_per_feed = 100 max_articles_per_feed = 100
no_stylesheets = True no_stylesheets = True
#delay = 1
#use_embedded_content = False
encoding = 'utf-8' encoding = 'utf-8'
publisher = 'Aksiyon' publisher = 'Aksiyon'
category = 'news, haberler,TR,gazete' category = 'news, haberler,TR,gazete'
@ -30,18 +29,24 @@ class Aksiyon (BasicNewsRecipe):
(u'YAZARLAR', u'http://www.aksiyon.com.tr/aksiyon/rss?sectionId=17'), (u'YAZARLAR', u'http://www.aksiyon.com.tr/aksiyon/rss?sectionId=17'),
(u'KİTAPLIK', u'http://www.aksiyon.com.tr/aksiyon/rss?sectionId=13'), (u'KİTAPLIK', u'http://www.aksiyon.com.tr/aksiyon/rss?sectionId=13'),
(u'SİNEMA', u'http://www.aksiyon.com.tr/aksiyon/rss?sectionId=14'), (u'SİNEMA', u'http://www.aksiyon.com.tr/aksiyon/rss?sectionId=14'),
( u'ARKA PENCERE', u'http://www.aksiyon.com.tr/aksiyon/rss?sectionId=27'), (u'ARKA PENCERE',
u'http://www.aksiyon.com.tr/aksiyon/rss?sectionId=27'),
(u'DÜNYA', u'http://www.aksiyon.com.tr/aksiyon/rss?sectionId=32'), (u'DÜNYA', u'http://www.aksiyon.com.tr/aksiyon/rss?sectionId=32'),
(u'DOSYALAR', u'http://www.aksiyon.com.tr/aksiyon/rss?sectionId=34'), (u'DOSYALAR', u'http://www.aksiyon.com.tr/aksiyon/rss?sectionId=34'),
(u'KARAKUTU', u'http://www.aksiyon.com.tr/aksiyon/rss?sectionId=11'), (u'KARAKUTU', u'http://www.aksiyon.com.tr/aksiyon/rss?sectionId=11'),
( u'KÜLTÜR & SANAT', u'http://www.aksiyon.com.tr/aksiyon/rss?sectionId=12'), (u'KÜLTÜR & SANAT',
u'http://www.aksiyon.com.tr/aksiyon/rss?sectionId=12'),
(u'SPOR', u'http://www.aksiyon.com.tr/aksiyon/rss?sectionId=38'), (u'SPOR', u'http://www.aksiyon.com.tr/aksiyon/rss?sectionId=38'),
( u'BİLİŞİM - TEKNOLOJİ', u'http://www.aksiyon.com.tr/aksiyon/rss?sectionId=39'), (u'BİLİŞİM - TEKNOLOJİ',
u'http://www.aksiyon.com.tr/aksiyon/rss?sectionId=39'),
(u'3. BOYUT', u'http://www.aksiyon.com.tr/aksiyon/rss?sectionId=172'), (u'3. BOYUT', u'http://www.aksiyon.com.tr/aksiyon/rss?sectionId=172'),
( u'HAYAT BİLGİSİ', u'http://www.aksiyon.com.tr/aksiyon/rss?sectionId=283'), (u'HAYAT BİLGİSİ',
( u'İŞ DÜNYASI', u'http://www.aksiyon.com.tr/aksiyon/rss?sectionId=283'), u'http://www.aksiyon.com.tr/aksiyon/rss?sectionId=283'),
(u'İŞ DÜNYASI',
u'http://www.aksiyon.com.tr/aksiyon/rss?sectionId=283'),
] ]
# def print_version(self, url): # def print_version(self, url):
#return url.replace('http://www.aksiyon.com.tr/aksiyon/newsDetail_getNewsById.action?load=detay&', 'http://www.aksiyon.com.tr/aksiyon/mobile_detailn.action?') # return
# url.replace('http://www.aksiyon.com.tr/aksiyon/newsDetail_getNewsById.action?load=detay&',
# 'http://www.aksiyon.com.tr/aksiyon/mobile_detailn.action?')

View File

@ -7,12 +7,13 @@ akter.co.rs
import re import re
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
class Akter(BasicNewsRecipe): class Akter(BasicNewsRecipe):
title = 'AKTER - Nedeljnik' title = 'AKTER - Nedeljnik'
__author__ = 'Darko Miletic' __author__ = 'Darko Miletic'
description = 'AKTER - nedeljni politicki magazin savremene Srbije' description = 'AKTER - nedeljni politicki magazin savremene Srbije'
publisher = 'Akter Media Group d.o.o.' publisher = 'Akter Media Group d.o.o.'
category = 'vesti, online vesti, najnovije vesti, politika, sport, ekonomija, biznis, finansije, berza, kultura, zivot, putovanja, auto, automobili, tehnologija, politicki magazin, dogadjaji, desavanja, lifestyle, zdravlje, zdravstvo, vest, novine, nedeljnik, srbija, novi sad, vojvodina, svet, drustvo, zabava, republika srpska, beograd, intervju, komentar, reportaza, arhiva vesti, news, serbia, politics' category = 'vesti, online vesti, najnovije vesti, politika, sport, ekonomija, biznis, finansije, berza, kultura, zivot, putovanja, auto, automobili, tehnologija, politicki magazin, dogadjaji, desavanja, lifestyle, zdravlje, zdravstvo, vest, novine, nedeljnik, srbija, novi sad, vojvodina, svet, drustvo, zabava, republika srpska, beograd, intervju, komentar, reportaza, arhiva vesti, news, serbia, politics' # noqa
oldest_article = 8 oldest_article = 8
max_articles_per_feed = 100 max_articles_per_feed = 100
no_stylesheets = True no_stylesheets = True
@ -29,10 +30,7 @@ class Akter(BasicNewsRecipe):
""" """
conversion_options = { conversion_options = {
'comment' : description 'comment': description, 'tags': category, 'publisher': publisher, 'language': language
, 'tags' : category
, 'publisher': publisher
, 'language' : language
} }
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')] preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
@ -51,4 +49,3 @@ class Akter(BasicNewsRecipe):
if imgt: if imgt:
return 'http://www.akter.co.rs' + imgt['src'] return 'http://www.akter.co.rs' + imgt['src']
return None return None

View File

@ -7,12 +7,12 @@ akter.co.rs
import re import re
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
class Akter(BasicNewsRecipe): class Akter(BasicNewsRecipe):
title = 'AKTER - Dnevnik' title = 'AKTER - Dnevnik'
__author__ = 'Darko Miletic' __author__ = 'Darko Miletic'
description = 'AKTER - Najnovije vesti iz Srbije' description = 'AKTER - Najnovije vesti iz Srbije'
publisher = 'Akter Media Group d.o.o.' publisher = 'Akter Media Group d.o.o.'
category = 'vesti, online vesti, najnovije vesti, politika, sport, ekonomija, biznis, finansije, berza, kultura, zivot, putovanja, auto, automobili, tehnologija, politicki magazin, dogadjaji, desavanja, lifestyle, zdravlje, zdravstvo, vest, novine, nedeljnik, srbija, novi sad, vojvodina, svet, drustvo, zabava, republika srpska, beograd, intervju, komentar, reportaza, arhiva vesti, news, serbia, politics'
oldest_article = 8 oldest_article = 8
max_articles_per_feed = 100 max_articles_per_feed = 100
no_stylesheets = True no_stylesheets = True
@ -29,10 +29,7 @@ class Akter(BasicNewsRecipe):
""" """
conversion_options = { conversion_options = {
'comment' : description 'comment': description, 'publisher': publisher, 'language': language
, 'tags' : category
, 'publisher': publisher
, 'language' : language
} }
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')] preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]

View File

@ -3,6 +3,7 @@ from __future__ import unicode_literals
from calibre.web.feeds.recipes import BasicNewsRecipe from calibre.web.feeds.recipes import BasicNewsRecipe
import re import re
class aktualneRecipe(BasicNewsRecipe): class aktualneRecipe(BasicNewsRecipe):
__author__ = 'bubak' __author__ = 'bubak'
title = u'aktualne.cz' title = u'aktualne.cz'
@ -34,7 +35,8 @@ class aktualneRecipe(BasicNewsRecipe):
dict(name='div', attrs={'class': 'itemcomment id0'}), dict(name='div', attrs={'class': 'itemcomment id0'}),
dict(name='div', attrs={'class': 'hlavicka'}), dict(name='div', attrs={'class': 'hlavicka'}),
dict(name='div', attrs={'class': 'hlavni-menu'}), dict(name='div', attrs={'class': 'hlavni-menu'}),
dict(name='div', attrs={'class':'top-standard-brand-obal'}), dict(name='div', attrs={
'class': 'top-standard-brand-obal'}),
dict(name='div', attrs={'class': 'breadcrumb'}), dict(name='div', attrs={'class': 'breadcrumb'}),
dict(name='div', attrs={'id': 'start-standard'}), dict(name='div', attrs={'id': 'start-standard'}),
dict(name='div', attrs={'id': 'forum'}), dict(name='div', attrs={'id': 'forum'}),
@ -50,6 +52,7 @@ class aktualneRecipe(BasicNewsRecipe):
keep_only_tags = [] keep_only_tags = []
visited_urls = {} visited_urls = {}
def get_article_url(self, article): def get_article_url(self, article):
url = BasicNewsRecipe.get_article_url(self, article) url = BasicNewsRecipe.get_article_url(self, article)
if url in self.visited_urls: if url in self.visited_urls:

View File

@ -6,6 +6,7 @@ ahram.org.eg
''' '''
from calibre.web.feeds.recipes import BasicNewsRecipe from calibre.web.feeds.recipes import BasicNewsRecipe
class AlAhram(BasicNewsRecipe): class AlAhram(BasicNewsRecipe):
title = u'Al-Ahram (الأهرام)' title = u'Al-Ahram (الأهرام)'
__author__ = 'Hassan Williamson' __author__ = 'Hassan Williamson'
@ -16,13 +17,12 @@ class AlAhram(BasicNewsRecipe):
oldest_article = 7 oldest_article = 7
max_articles_per_feed = 100 max_articles_per_feed = 100
no_stylesheets = True no_stylesheets = True
#delay = 1
use_embedded_content = False use_embedded_content = False
publisher = 'Al-Ahram' publisher = 'Al-Ahram'
category = 'News' category = 'News'
publication_type = 'newsportal' publication_type = 'newsportal'
extra_css = ' body{ font-family: Verdana,Helvetica,Arial,sans-serif; direction: rtl; } .bbtitle{ font-weight: bold; font-size: 2em; } .bbsubtitle{ font-size: 1.3em; } #WriterImage{ height: 10px; } ' extra_css = ' body{ font-family: Verdana,Helvetica,Arial,sans-serif; direction: rtl; } .bbtitle{ font-weight: bold; font-size: 2em; } .bbsubtitle{ font-size: 1.3em; } #WriterImage{ height: 10px; } ' # noqa
keep_only_tags = [ keep_only_tags = [
dict(name='div', attrs={'class': ['bbcolright']}) dict(name='div', attrs={'class': ['bbcolright']})
@ -41,26 +41,36 @@ class AlAhram(BasicNewsRecipe):
feeds = [ feeds = [
(u'الأولى', 'http://www.ahram.org.eg/archive/RssXml.aspx?CategoryID=25'), (u'الأولى', 'http://www.ahram.org.eg/archive/RssXml.aspx?CategoryID=25'),
(u'الصفحة الثانية', 'http://www.ahram.org.eg/archive/RssXml.aspx?CategoryID=74'), (u'الصفحة الثانية',
'http://www.ahram.org.eg/archive/RssXml.aspx?CategoryID=74'),
(u'مصر', 'http://www.ahram.org.eg/archive/RssXml.aspx?CategoryID=27'), (u'مصر', 'http://www.ahram.org.eg/archive/RssXml.aspx?CategoryID=27'),
(u'المشهد السياسي', 'http://www.ahram.org.eg/archive/RssXml.aspx?CategoryID=60'), (u'المشهد السياسي',
'http://www.ahram.org.eg/archive/RssXml.aspx?CategoryID=60'),
(u'المحافظات', 'http://www.ahram.org.eg/archive/RssXml.aspx?CategoryID=29'), (u'المحافظات', 'http://www.ahram.org.eg/archive/RssXml.aspx?CategoryID=29'),
(u'الوطن العربي', 'http://www.ahram.org.eg/archive/RssXml.aspx?CategoryID=31'), (u'الوطن العربي',
'http://www.ahram.org.eg/archive/RssXml.aspx?CategoryID=31'),
(u'العالم', 'http://www.ahram.org.eg/archive/RssXml.aspx?CategoryID=26'), (u'العالم', 'http://www.ahram.org.eg/archive/RssXml.aspx?CategoryID=26'),
(u'تقارير المراسلين', 'http://www.ahram.org.eg/archive/RssXml.aspx?CategoryID=2'), (u'تقارير المراسلين',
'http://www.ahram.org.eg/archive/RssXml.aspx?CategoryID=2'),
(u'تحقيقات', 'http://www.ahram.org.eg/archive/RssXml.aspx?CategoryID=3'), (u'تحقيقات', 'http://www.ahram.org.eg/archive/RssXml.aspx?CategoryID=3'),
(u'قضايا واراء', 'http://www.ahram.org.eg/archive/RssXml.aspx?CategoryID=4'), (u'قضايا واراء',
'http://www.ahram.org.eg/archive/RssXml.aspx?CategoryID=4'),
(u'اقتصاد', 'http://www.ahram.org.eg/archive/RssXml.aspx?CategoryID=5'), (u'اقتصاد', 'http://www.ahram.org.eg/archive/RssXml.aspx?CategoryID=5'),
(u'رياضة', 'http://www.ahram.org.eg/archive/RssXml.aspx?CategoryID=6'), (u'رياضة', 'http://www.ahram.org.eg/archive/RssXml.aspx?CategoryID=6'),
(u'حوادث', 'http://www.ahram.org.eg/archive/RssXml.aspx?CategoryID=38'), (u'حوادث', 'http://www.ahram.org.eg/archive/RssXml.aspx?CategoryID=38'),
(u'دنيا الثقافة', 'http://www.ahram.org.eg/archive/RssXml.aspx?CategoryID=7'), (u'دنيا الثقافة',
(u'المراة والطفل', 'http://www.ahram.org.eg/archive/RssXml.aspx?CategoryID=8'), 'http://www.ahram.org.eg/archive/RssXml.aspx?CategoryID=7'),
(u'المراة والطفل',
'http://www.ahram.org.eg/archive/RssXml.aspx?CategoryID=8'),
(u'يوم جديد', 'http://www.ahram.org.eg/archive/RssXml.aspx?CategoryID=9'), (u'يوم جديد', 'http://www.ahram.org.eg/archive/RssXml.aspx?CategoryID=9'),
(u'الكتاب', 'http://www.ahram.org.eg/archive/RssXml.aspx?CategoryID=10'), (u'الكتاب', 'http://www.ahram.org.eg/archive/RssXml.aspx?CategoryID=10'),
(u'الاعمدة', 'http://www.ahram.org.eg/archive/RssXml.aspx?CategoryID=11'), (u'الاعمدة', 'http://www.ahram.org.eg/archive/RssXml.aspx?CategoryID=11'),
(u'أراء حرة', 'http://www.ahram.org.eg/archive/RssXml.aspx?CategoryID=59'), (u'أراء حرة', 'http://www.ahram.org.eg/archive/RssXml.aspx?CategoryID=59'),
(u'ملفات الاهرام', 'http://www.ahram.org.eg/archive/RssXml.aspx?CategoryID=12'), (u'ملفات الاهرام',
(u'بريد الاهرام', 'http://www.ahram.org.eg/archive/RssXml.aspx?CategoryID=15'), 'http://www.ahram.org.eg/archive/RssXml.aspx?CategoryID=12'),
(u'برلمان الثورة', 'http://www.ahram.org.eg/archive/RssXml.aspx?CategoryID=61'), (u'بريد الاهرام',
'http://www.ahram.org.eg/archive/RssXml.aspx?CategoryID=15'),
(u'برلمان الثورة',
'http://www.ahram.org.eg/archive/RssXml.aspx?CategoryID=61'),
(u'الاخيرة', 'http://www.ahram.org.eg/archive/RssXml.aspx?CategoryID=16'), (u'الاخيرة', 'http://www.ahram.org.eg/archive/RssXml.aspx?CategoryID=16'),
] ]

View File

@ -6,9 +6,11 @@ english.aljazeera.net
''' '''
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
def has_cls(x): def has_cls(x):
return dict(attrs={'class': lambda cls: cls and x in cls.split()}) return dict(attrs={'class': lambda cls: cls and x in cls.split()})
class AlJazeera(BasicNewsRecipe): class AlJazeera(BasicNewsRecipe):
title = 'Al Jazeera in English' title = 'Al Jazeera in English'
__author__ = 'Darko Miletic' __author__ = 'Darko Miletic'
@ -35,11 +37,14 @@ class AlJazeera(BasicNewsRecipe):
] ]
remove_tags = [ remove_tags = [
has_cls('MoreOnTheStory'), has_cls('ArticleBottomToolbar'), dict(smtitle="ShowMore"), has_cls('MoreOnTheStory'), has_cls(
dict(name=['object','link','table','meta','base','iframe','embed']), 'ArticleBottomToolbar'), dict(smtitle="ShowMore"),
dict(name=['object', 'link', 'table',
'meta', 'base', 'iframe', 'embed']),
] ]
feeds = [(u'Al Jazeera English', u'http://english.aljazeera.net/Services/Rss/?PostingId=2007731105943979989')] feeds = [(u'Al Jazeera English',
u'http://english.aljazeera.net/Services/Rss/?PostingId=2007731105943979989')]
def get_article_url(self, article): def get_article_url(self, article):
artlurl = article.get('link', None) artlurl = article.get('link', None)

View File

@ -6,6 +6,7 @@ almasryalyoum.com
''' '''
from calibre.web.feeds.recipes import BasicNewsRecipe from calibre.web.feeds.recipes import BasicNewsRecipe
class AlMasryAlyoum(BasicNewsRecipe): class AlMasryAlyoum(BasicNewsRecipe):
title = u'Al-Masry Alyoum (المصري اليوم)' title = u'Al-Masry Alyoum (المصري اليوم)'
__author__ = 'Hassan Williamson' __author__ = 'Hassan Williamson'
@ -16,13 +17,12 @@ class AlMasryAlyoum(BasicNewsRecipe):
oldest_article = 7 oldest_article = 7
max_articles_per_feed = 100 max_articles_per_feed = 100
no_stylesheets = True no_stylesheets = True
#delay = 1
use_embedded_content = False use_embedded_content = False
publisher = 'Al-Masry Alyoum' publisher = 'Al-Masry Alyoum'
category = 'News' category = 'News'
publication_type = 'newsportal' publication_type = 'newsportal'
extra_css = ' body{ font-family: Verdana,Helvetica,Arial,sans-serif; direction: rtl; } .tit_2{ font-weight: bold; font-size: 2em; } .pinfo{ font-size: 1.3em; } .articleimg img{ max-width: 100%; } .imgauther{ display: block; font-size: 0.7em; } .caption{ font-size: 0.7em; } ' extra_css = ' body{ font-family: Verdana,Helvetica,Arial,sans-serif; direction: rtl; } .tit_2{ font-weight: bold; font-size: 2em; } .pinfo{ font-size: 1.3em; } .articleimg img{ max-width: 100%; } .imgauther{ display: block; font-size: 0.7em; } .caption{ font-size: 0.7em; } ' # noqa
keep_only_tags = [ keep_only_tags = [
dict(name='div', attrs={'class': ['article']}) dict(name='div', attrs={'class': ['article']})
@ -47,7 +47,8 @@ class AlMasryAlyoum(BasicNewsRecipe):
feeds = [ feeds = [
(u'أخر الأخبار', 'http://www.almasryalyoum.com/rss/RssFeeds'), (u'أخر الأخبار', 'http://www.almasryalyoum.com/rss/RssFeeds'),
(u'الصفحة الرئيسية', 'http://www.almasryalyoum.com/rss/RssFeeds?homePage=true'), (u'الصفحة الرئيسية',
'http://www.almasryalyoum.com/rss/RssFeeds?homePage=true'),
(u'أقلام وآراء', 'http://www.almasryalyoum.com/rss/RssFeeds?typeId=2&homePage=false'), (u'أقلام وآراء', 'http://www.almasryalyoum.com/rss/RssFeeds?typeId=2&homePage=false'),
(u'أخبار مصر', 'http://www.almasryalyoum.com/rss/RssFeeds?sectionId=3'), (u'أخبار مصر', 'http://www.almasryalyoum.com/rss/RssFeeds?sectionId=3'),
(u'رياضة', 'http://www.almasryalyoum.com/rss/RssFeeds?sectionId=8'), (u'رياضة', 'http://www.almasryalyoum.com/rss/RssFeeds?sectionId=8'),
@ -56,23 +57,28 @@ class AlMasryAlyoum(BasicNewsRecipe):
(u'فنون', 'http://www.almasryalyoum.com/rss/RssFeeds?sectionId=10'), (u'فنون', 'http://www.almasryalyoum.com/rss/RssFeeds?sectionId=10'),
(u'منوعاتنون', 'http://www.almasryalyoum.com/rss/RssFeeds?sectionId=12'), (u'منوعاتنون', 'http://www.almasryalyoum.com/rss/RssFeeds?sectionId=12'),
(u'ثقافة', 'http://www.almasryalyoum.com/rss/RssFeeds?sectionId=6'), (u'ثقافة', 'http://www.almasryalyoum.com/rss/RssFeeds?sectionId=6'),
(u'علوم وتكنولوجيا', 'http://www.almasryalyoum.com/rss/RssFeeds?sectionId=9'), (u'علوم وتكنولوجيا',
(u'تحقيقات وحوارات', 'http://www.almasryalyoum.com/rss/RssFeeds?sectionId=5'), 'http://www.almasryalyoum.com/rss/RssFeeds?sectionId=9'),
(u'تحقيقات وحوارات',
'http://www.almasryalyoum.com/rss/RssFeeds?sectionId=5'),
(u'المرأة', 'http://www.almasryalyoum.com/rss/RssFeeds?sectionId=69'), (u'المرأة', 'http://www.almasryalyoum.com/rss/RssFeeds?sectionId=69'),
(u'رأي', 'http://www.almasryalyoum.com/rss/RssFeeds?sectionId=2'), (u'رأي', 'http://www.almasryalyoum.com/rss/RssFeeds?sectionId=2'),
(u'وسط الناس', 'http://www.almasryalyoum.com/rss/RssFeeds?sectionId=13'), (u'وسط الناس', 'http://www.almasryalyoum.com/rss/RssFeeds?sectionId=13'),
(u'مركز المصري للدراسات و المعلومات', 'http://www.almasryalyoum.com/rss/RssFeeds?sectionId=56'), (u'مركز المصري للدراسات و المعلومات',
'http://www.almasryalyoum.com/rss/RssFeeds?sectionId=56'),
(u'مطبخ', 'http://www.almasryalyoum.com/rss/RssFeeds?sectionId=81'), (u'مطبخ', 'http://www.almasryalyoum.com/rss/RssFeeds?sectionId=81'),
(u'برلمان مصر', 'http://www.almasryalyoum.com/rss/RssFeeds?sectionId=78'), (u'برلمان مصر', 'http://www.almasryalyoum.com/rss/RssFeeds?sectionId=78'),
(u'تقارير', 'http://www.almasryalyoum.com/rss/RssFeeds?sectionId=54'), (u'تقارير', 'http://www.almasryalyoum.com/rss/RssFeeds?sectionId=54'),
(u'تحليلات', 'http://www.almasryalyoum.com/rss/RssFeeds?sectionId=60'), (u'تحليلات', 'http://www.almasryalyoum.com/rss/RssFeeds?sectionId=60'),
(u'عروض نقدية', 'http://www.almasryalyoum.com/rss/RssFeeds?sectionId=61'), (u'عروض نقدية', 'http://www.almasryalyoum.com/rss/RssFeeds?sectionId=61'),
(u'دراسات', 'http://www.almasryalyoum.com/rss/RssFeeds?sectionId=62'), (u'دراسات', 'http://www.almasryalyoum.com/rss/RssFeeds?sectionId=62'),
(u'كتاب المصري اليوم', 'http://www.almasryalyoum.com/rss/RssFeeds?sectionId=65'), (u'كتاب المصري اليوم',
'http://www.almasryalyoum.com/rss/RssFeeds?sectionId=65'),
(u'فعاليات', 'http://www.almasryalyoum.com/rss/RssFeeds?sectionId=66'), (u'فعاليات', 'http://www.almasryalyoum.com/rss/RssFeeds?sectionId=66'),
(u'إسلامي', 'http://www.almasryalyoum.com/rss/RssFeeds?sectionId=75'), (u'إسلامي', 'http://www.almasryalyoum.com/rss/RssFeeds?sectionId=75'),
(u'مطبخي', 'http://www.almasryalyoum.com/rss/RssFeeds?sectionId=76'), (u'مطبخي', 'http://www.almasryalyoum.com/rss/RssFeeds?sectionId=76'),
(u'مسلسلاتيطبخي', 'http://www.almasryalyoum.com/rss/RssFeeds?sectionId=77'), (u'مسلسلاتيطبخي',
'http://www.almasryalyoum.com/rss/RssFeeds?sectionId=77'),
(u'رمضان زمان', 'http://www.almasryalyoum.com/rss/RssFeeds?sectionId=82'), (u'رمضان زمان', 'http://www.almasryalyoum.com/rss/RssFeeds?sectionId=82'),
(u'تقارير', 'http://www.almasryalyoum.com/rss/RssFeeds?sectionId=85'), (u'تقارير', 'http://www.almasryalyoum.com/rss/RssFeeds?sectionId=85'),
(u'سيارات', 'http://www.almasryalyoum.com/rss/RssFeeds?sectionId=86'), (u'سيارات', 'http://www.almasryalyoum.com/rss/RssFeeds?sectionId=86'),

View File

@ -5,10 +5,14 @@ __copyright__ = '2014, spswerling'
''' '''
http://www.al-monitor.com/ http://www.al-monitor.com/
''' '''
import string, inspect, datetime, re import string
import inspect
import datetime
import re
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import BeautifulSoup from calibre.ebooks.BeautifulSoup import BeautifulSoup
class AlMonitor(BasicNewsRecipe): class AlMonitor(BasicNewsRecipe):
title = u'Al Monitor' title = u'Al Monitor'
__author__ = u'spswerling' __author__ = u'spswerling'

View File

@ -3,6 +3,7 @@ __copyright__ = '2012, Peter Grungi <p dot grungi at gmail dot com>'
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
class AlbertMohlersBlog(BasicNewsRecipe): class AlbertMohlersBlog(BasicNewsRecipe):
title = u'Albert Mohler\'s Blog' title = u'Albert Mohler\'s Blog'
__author__ = 'Peter Grungi' __author__ = 'Peter Grungi'
@ -15,4 +16,5 @@ class AlbertMohlersBlog(BasicNewsRecipe):
language = 'en' language = 'en'
author = 'Albert Mohler' author = 'Albert Mohler'
feeds = [(u'Albert Mohler\'s Blog', u'http://feeds.feedburner.com/AlbertMohlersBlog?format=xml')] feeds = [(u'Albert Mohler\'s Blog',
u'http://feeds.feedburner.com/AlbertMohlersBlog?format=xml')]

View File

@ -2,16 +2,16 @@ __license__ = 'GPL v3'
import re import re
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
class AlejaKomiksu(BasicNewsRecipe): class AlejaKomiksu(BasicNewsRecipe):
title = u'Aleja Komiksu' title = u'Aleja Komiksu'
__author__ = 'fenuks' __author__ = 'fenuks'
description = u'Serwis poświęcony komiksom. Najnowsze wieści, recenzje, artykuły, wywiady, galerie, komiksy online, konkursy, linki, baza komiksów online.' description = u'Serwis poświęcony komiksom. Najnowsze wieści, recenzje, artykuły, wywiady, galerie, komiksy online, konkursy, linki, baza komiksów online.'
category = 'comics' category = 'comics'
#publication_type = ''
language = 'pl' language = 'pl'
#encoding = ''
extra_css = 'ul {list-style-type: none;} .gfx_news {float: right;}' extra_css = 'ul {list-style-type: none;} .gfx_news {float: right;}'
preprocess_regexps = [(re.compile(ur'((<li class="no_img_b">(Do poczytania)|(Nowości):</li>)|(<p class="head2">Komentarze</p>)).*</body>', re.DOTALL|re.IGNORECASE), lambda match: '</body>')] preprocess_regexps = [(re.compile(ur'((<li class="no_img_b">(Do poczytania)|(Nowości):</li>)|(<p class="head2">Komentarze</p>)).*</body>',
re.DOTALL | re.IGNORECASE), lambda match: '</body>')]
cover_url = 'http://www.alejakomiksu.com/gfx/build/logo.png' cover_url = 'http://www.alejakomiksu.com/gfx/build/logo.png'
masthead_url = 'http://www.alejakomiksu.com/gfx/build/logo.png' masthead_url = 'http://www.alejakomiksu.com/gfx/build/logo.png'
use_embedded_content = False use_embedded_content = False
@ -24,8 +24,6 @@ class AlejaKomiksu(BasicNewsRecipe):
ignore_duplicate_articles = {'title', 'url'} ignore_duplicate_articles = {'title', 'url'}
keep_only_tags = [dict(attrs={'class': 'cont_tresc'})] keep_only_tags = [dict(attrs={'class': 'cont_tresc'})]
#remove_tags = [dict()]
#remove_tags_before = dict()
feeds = [(u'Wiadomości', 'http://www.alejakomiksu.com/rss.php5')] feeds = [(u'Wiadomości', 'http://www.alejakomiksu.com/rss.php5')]

View File

@ -8,6 +8,7 @@ www.alo.rs
import re import re
from calibre.web.feeds.recipes import BasicNewsRecipe from calibre.web.feeds.recipes import BasicNewsRecipe
class Alo_Novine(BasicNewsRecipe): class Alo_Novine(BasicNewsRecipe):
title = 'Alo!' title = 'Alo!'
__author__ = 'Darko Miletic' __author__ = 'Darko Miletic'
@ -30,10 +31,7 @@ class Alo_Novine(BasicNewsRecipe):
img{margin-bottom: 0.8em} """ img{margin-bottom: 0.8em} """
conversion_options = { conversion_options = {
'comment' : description 'comment': description, 'tags': category, 'publisher': publisher, 'language': language
, 'tags' : category
, 'publisher': publisher
, 'language' : language
} }
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')] preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
@ -42,12 +40,13 @@ class Alo_Novine(BasicNewsRecipe):
remove_attributes = ['height', 'width'] remove_attributes = ['height', 'width']
feeds = [ feeds = [
(u'Najnovije Vijesti', u'http://www.alo.rs/rss/danasnje_vesti')
,(u'Politika' , u'http://www.alo.rs/rss/politika') (u'Najnovije Vijesti', u'http://www.alo.rs/rss/danasnje_vesti'),
,(u'Vesti' , u'http://www.alo.rs/rss/vesti') (u'Politika', u'http://www.alo.rs/rss/politika'),
,(u'Sport' , u'http://www.alo.rs/rss/sport') (u'Vesti', u'http://www.alo.rs/rss/vesti'),
,(u'Ljudi' , u'http://www.alo.rs/rss/ljudi') (u'Sport', u'http://www.alo.rs/rss/sport'),
,(u'Saveti' , u'http://www.alo.rs/rss/saveti') (u'Ljudi', u'http://www.alo.rs/rss/ljudi'),
(u'Saveti', u'http://www.alo.rs/rss/saveti')
] ]
def preprocess_html(self, soup): def preprocess_html(self, soup):
@ -62,4 +61,3 @@ class Alo_Novine(BasicNewsRecipe):
def image_url_processor(self, baseurl, url): def image_url_processor(self, baseurl, url):
return url.replace('alo.rs//', 'alo.rs/') return url.replace('alo.rs//', 'alo.rs/')

View File

@ -6,6 +6,7 @@ aoh.dk
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
class aoh_dk(BasicNewsRecipe): class aoh_dk(BasicNewsRecipe):
title = 'Alt om Herning' title = 'Alt om Herning'
__author__ = 'Rasmus Lauritsen' __author__ = 'Rasmus Lauritsen'
@ -25,17 +26,13 @@ class aoh_dk(BasicNewsRecipe):
""" """
conversion_options = { conversion_options = {
'comment' : description 'comment': description, 'tags': category, 'publisher': publisher, 'language': language
, 'tags' : category
, 'publisher' : publisher
, 'language' : language
} }
feeds = [(u'All news', u'http://aoh.dk/rss.xml')] feeds = [(u'All news', u'http://aoh.dk/rss.xml')]
keep_only_tags = [ keep_only_tags = [
dict(name='h1') dict(name='h1'), dict(name='span', attrs={'class': ['frontpage_body']})
,dict(name='span', attrs={'class':['frontpage_body']})
] ]
remove_tags = [ remove_tags = [

View File

@ -1,6 +1,7 @@
from calibre.ptempfile import PersistentTemporaryFile from calibre.ptempfile import PersistentTemporaryFile
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
class Alternet(BasicNewsRecipe): class Alternet(BasicNewsRecipe):
title = u'Alternet' title = u'Alternet'
__author__ = 'rty' __author__ = 'rty'

View File

@ -12,7 +12,9 @@ Change Log:
from calibre import (__appname__, force_unicode, strftime) from calibre import (__appname__, force_unicode, strftime)
from calibre.utils.date import now as nowf from calibre.utils.date import now as nowf
import os, datetime, re import os
import datetime
import re
from calibre.web.feeds.recipes import BasicNewsRecipe from calibre.web.feeds.recipes import BasicNewsRecipe
from contextlib import nested from contextlib import nested
from calibre.ebooks.BeautifulSoup import BeautifulSoup from calibre.ebooks.BeautifulSoup import BeautifulSoup
@ -21,6 +23,7 @@ from calibre.ebooks.metadata.toc import TOC
from calibre.ebooks.metadata import MetaInformation from calibre.ebooks.metadata import MetaInformation
from calibre.utils.localization import canonicalize_lang from calibre.utils.localization import canonicalize_lang
class AppleDaily(BasicNewsRecipe): class AppleDaily(BasicNewsRecipe):
title = u'AM730' title = u'AM730'
__author__ = 'Eddie Lau' __author__ = 'Eddie Lau'
@ -37,7 +40,7 @@ class AppleDaily(BasicNewsRecipe):
description = 'http://www.am730.com.hk' description = 'http://www.am730.com.hk'
category = 'Chinese, News, Hong Kong' category = 'Chinese, News, Hong Kong'
masthead_url = 'http://www.am730.com.hk/images/logo.jpg' masthead_url = 'http://www.am730.com.hk/images/logo.jpg'
extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 20px; margin-bottom: 20px; max-height:70%;} div[id=articleHeader] {font-size:200%; text-align:left; font-weight:bold;} li {font-size:50%; margin-left:auto; margin-right:auto;}' extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 20px; margin-bottom: 20px; max-height:70%;} div[id=articleHeader] {font-size:200%; text-align:left; font-weight:bold;} li {font-size:50%; margin-left:auto; margin-right:auto;}' # noqa
keep_only_tags = [dict(name='h2', attrs={'class': 'printTopic'}), keep_only_tags = [dict(name='h2', attrs={'class': 'printTopic'}),
dict(name='div', attrs={'id': 'article_content'}), dict(name='div', attrs={'id': 'article_content'}),
dict(name='div', attrs={'id': 'slider'})] dict(name='div', attrs={'id': 'slider'})]
@ -50,31 +53,31 @@ class AppleDaily(BasicNewsRecipe):
return dt_utc + datetime.timedelta(8.0 / 24) - datetime.timedelta(6.0 / 24) return dt_utc + datetime.timedelta(8.0 / 24) - datetime.timedelta(6.0 / 24)
def get_fetchdate(self): def get_fetchdate(self):
if __Date__ <> '': if __Date__ != '':
return __Date__ return __Date__
else: else:
return self.get_dtlocal().strftime("%Y%m%d") return self.get_dtlocal().strftime("%Y%m%d")
def get_fetchformatteddate(self): def get_fetchformatteddate(self):
if __Date__ <> '': if __Date__ != '':
return __Date__[0:4] + '-' + __Date__[4:6] + '-' + __Date__[6:8] return __Date__[0:4] + '-' + __Date__[4:6] + '-' + __Date__[6:8]
else: else:
return self.get_dtlocal().strftime("%Y-%m-%d") return self.get_dtlocal().strftime("%Y-%m-%d")
def get_fetchyear(self): def get_fetchyear(self):
if __Date__ <> '': if __Date__ != '':
return __Date__[0:4] return __Date__[0:4]
else: else:
return self.get_dtlocal().strftime("%Y") return self.get_dtlocal().strftime("%Y")
def get_fetchmonth(self): def get_fetchmonth(self):
if __Date__ <> '': if __Date__ != '':
return __Date__[4:6] return __Date__[4:6]
else: else:
return self.get_dtlocal().strftime("%m") return self.get_dtlocal().strftime("%m")
def get_fetchday(self): def get_fetchday(self):
if __Date__ <> '': if __Date__ != '':
return __Date__[6:8] return __Date__[6:8]
else: else:
return self.get_dtlocal().strftime("%d") return self.get_dtlocal().strftime("%d")
@ -85,7 +88,9 @@ class AppleDaily(BasicNewsRecipe):
def get_cover_url(self): def get_cover_url(self):
soup = self.index_to_soup('http://www.am730.com.hk') soup = self.index_to_soup('http://www.am730.com.hk')
cover = 'http://www.am730.com.hk/' + soup.find(attrs={'id':'mini_news_img'}).find('img').get('src', False) cover = 'http://www.am730.com.hk/' + \
soup.find(attrs={'id': 'mini_news_img'}).find(
'img').get('src', False)
br = BasicNewsRecipe.get_browser(self) br = BasicNewsRecipe.get_browser(self)
try: try:
br.open(cover) br.open(cover)
@ -123,7 +128,8 @@ class AppleDaily(BasicNewsRecipe):
mi.publisher = __appname__ mi.publisher = __appname__
mi.author_sort = __appname__ mi.author_sort = __appname__
if self.publication_type: if self.publication_type:
mi.publication_type = 'periodical:'+self.publication_type+':'+self.short_title() mi.publication_type = 'periodical:' + \
self.publication_type + ':' + self.short_title()
mi.timestamp = nowf() mi.timestamp = nowf()
article_titles, aseen = [], set() article_titles, aseen = [], set()
for f in feeds: for f in feeds:
@ -142,9 +148,9 @@ class AppleDaily(BasicNewsRecipe):
if language is not None: if language is not None:
mi.language = language mi.language = language
# This one affects the pub date shown in kindle title # This one affects the pub date shown in kindle title
#mi.pubdate = nowf()
# now appears to need the time field to be > 12.00noon as well # now appears to need the time field to be > 12.00noon as well
mi.pubdate = datetime.datetime(int(self.get_fetchyear()), int(self.get_fetchmonth()), int(self.get_fetchday()), 12, 30, 0) mi.pubdate = datetime.datetime(int(self.get_fetchyear()), int(
self.get_fetchmonth()), int(self.get_fetchday()), 12, 30, 0)
opf_path = os.path.join(dir, 'index.opf') opf_path = os.path.join(dir, 'index.opf')
ncx_path = os.path.join(dir, 'index.ncx') ncx_path = os.path.join(dir, 'index.ncx')
@ -153,12 +159,14 @@ class AppleDaily(BasicNewsRecipe):
mp = getattr(self, 'masthead_path', None) mp = getattr(self, 'masthead_path', None)
if mp is not None and os.access(mp, os.R_OK): if mp is not None and os.access(mp, os.R_OK):
from calibre.ebooks.metadata.opf2 import Guide from calibre.ebooks.metadata.opf2 import Guide
ref = Guide.Reference(os.path.basename(self.masthead_path), os.getcwdu()) ref = Guide.Reference(os.path.basename(
self.masthead_path), os.getcwdu())
ref.type = 'masthead' ref.type = 'masthead'
ref.title = 'Masthead Image' ref.title = 'Masthead Image'
opf.guide.append(ref) opf.guide.append(ref)
manifest = [os.path.join(dir, 'feed_%d'%i) for i in range(len(feeds))] manifest = [os.path.join(dir, 'feed_%d' % i)
for i in range(len(feeds))]
manifest.append(os.path.join(dir, 'index.html')) manifest.append(os.path.join(dir, 'index.html'))
manifest.append(os.path.join(dir, 'index.ncx')) manifest.append(os.path.join(dir, 'index.ncx'))
@ -189,7 +197,6 @@ class AppleDaily(BasicNewsRecipe):
self.play_order_counter = 0 self.play_order_counter = 0
self.play_order_map = {} self.play_order_map = {}
def feed_index(num, parent): def feed_index(num, parent):
f = feeds[num] f = feeds[num]
for j, a in enumerate(f): for j, a in enumerate(f):
@ -210,10 +217,12 @@ class AppleDaily(BasicNewsRecipe):
self.play_order_counter += 1 self.play_order_counter += 1
po = self.play_order_counter po = self.play_order_counter
parent.add_item('%sindex.html' % adir, None, parent.add_item('%sindex.html' % adir, None,
a.title if a.title else _('Untitled Article'), a.title if a.title else _(
'Untitled Article'),
play_order=po, author=auth, play_order=po, author=auth,
description=desc, toc_thumbnail=tt) description=desc, toc_thumbnail=tt)
last = os.path.join(self.output_dir, ('%sindex.html'%adir).replace('/', os.sep)) last = os.path.join(
self.output_dir, ('%sindex.html' % adir).replace('/', os.sep))
for sp in a.sub_pages: for sp in a.sub_pages:
prefix = os.path.commonprefix([opf_path, sp]) prefix = os.path.commonprefix([opf_path, sp])
relp = sp[len(prefix):] relp = sp[len(prefix):]
@ -226,12 +235,14 @@ class AppleDaily(BasicNewsRecipe):
soup = BeautifulSoup(src) soup = BeautifulSoup(src)
body = soup.find('body') body = soup.find('body')
if body is not None: if body is not None:
prefix = '/'.join('..'for i in range(2*len(re.findall(r'link\d+', last)))) prefix = '/'.join('..'for i in range(2 *
len(re.findall(r'link\d+', last))))
templ = self.navbar.generate(True, num, j, len(f), templ = self.navbar.generate(True, num, j, len(f),
not self.has_single_feed, not self.has_single_feed,
a.orig_url, __appname__, prefix=prefix, a.orig_url, __appname__, prefix=prefix,
center=self.center_navbar) center=self.center_navbar)
elem = BeautifulSoup(templ.render(doctype='xhtml').decode('utf-8')).find('div') elem = BeautifulSoup(templ.render(
doctype='xhtml').decode('utf-8')).find('div')
body.insert(len(body.contents), elem) body.insert(len(body.contents), elem)
with open(last, 'wb') as fi: with open(last, 'wb') as fi:
fi.write(unicode(soup).encode('utf-8')) fi.write(unicode(soup).encode('utf-8'))
@ -265,5 +276,3 @@ class AppleDaily(BasicNewsRecipe):
with nested(open(opf_path, 'wb'), open(ncx_path, 'wb')) as (opf_file, ncx_file): with nested(open(opf_path, 'wb'), open(ncx_path, 'wb')) as (opf_file, ncx_file):
opf.render(opf_file, ncx_file) opf.render(opf_file, ncx_file)

View File

@ -6,6 +6,7 @@ ambito.com
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
class Ambito(BasicNewsRecipe): class Ambito(BasicNewsRecipe):
title = 'Ambito.com' title = 'Ambito.com'
__author__ = 'Darko Miletic' __author__ = 'Darko Miletic'
@ -26,26 +27,25 @@ class Ambito(BasicNewsRecipe):
.t2_portada{font-size: xx-large; font-family: Georgia,serif; color: #026698} .t2_portada{font-size: xx-large; font-family: Georgia,serif; color: #026698}
""" """
conversion_options = { conversion_options = {
'comment' : description 'comment': description, 'tags': category, 'publisher': publisher, 'language': language
, 'tags' : category
, 'publisher' : publisher
, 'language' : language
} }
keep_only_tags = [dict(attrs={'id':['tituloDespliegue','imgDesp','textoDespliegue']})] keep_only_tags = [
remove_tags = [dict(name=['object','link','embed','iframe','meta','link'])] dict(attrs={'id': ['tituloDespliegue', 'imgDesp', 'textoDespliegue']})]
remove_tags = [
dict(name=['object', 'link', 'embed', 'iframe', 'meta', 'link'])]
feeds = [ feeds = [
(u'Principales Noticias', u'http://www.ambito.com/rss/noticiasp.asp' )
,(u'Economia' , u'http://www.ambito.com/rss/noticias.asp?S=Econom%EDa' ) (u'Principales Noticias', u'http://www.ambito.com/rss/noticiasp.asp'),
,(u'Politica' , u'http://www.ambito.com/rss/noticias.asp?S=Pol%EDtica' ) (u'Economia', u'http://www.ambito.com/rss/noticias.asp?S=Econom%EDa'),
,(u'Informacion General' , u'http://www.ambito.com/rss/noticias.asp?S=Informaci%F3n%20General') (u'Politica', u'http://www.ambito.com/rss/noticias.asp?S=Pol%EDtica'),
,(u'Campo' , u'http://www.ambito.com/rss/noticias.asp?S=Agro' ) (u'Informacion General', u'http://www.ambito.com/rss/noticias.asp?S=Informaci%F3n%20General'),
,(u'Internacionales' , u'http://www.ambito.com/rss/noticias.asp?S=Internacionales' ) (u'Campo', u'http://www.ambito.com/rss/noticias.asp?S=Agro'),
,(u'Deportes' , u'http://www.ambito.com/rss/noticias.asp?S=Deportes' ) (u'Internacionales', u'http://www.ambito.com/rss/noticias.asp?S=Internacionales'),
,(u'Espectaculos' , u'http://www.ambito.com/rss/noticias.asp?S=Espect%E1culos' ) (u'Deportes', u'http://www.ambito.com/rss/noticias.asp?S=Deportes'),
,(u'Tecnologia' , u'http://www.ambito.com/rss/noticias.asp?S=Tecnolog%EDa' ) (u'Espectaculos', u'http://www.ambito.com/rss/noticias.asp?S=Espect%E1culos'),
,(u'Ambito Nacional' , u'http://www.ambito.com/rss/noticias.asp?S=Ambito%20Nacional' ) (u'Tecnologia', u'http://www.ambito.com/rss/noticias.asp?S=Tecnolog%EDa'),
(u'Ambito Nacional', u'http://www.ambito.com/rss/noticias.asp?S=Ambito%20Nacional')
] ]

View File

@ -8,6 +8,7 @@ import time
from calibre import strftime from calibre import strftime
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
class Ambito_Financiero(BasicNewsRecipe): class Ambito_Financiero(BasicNewsRecipe):
title = 'Ambito Financiero' title = 'Ambito Financiero'
__author__ = 'Darko Miletic' __author__ = 'Darko Miletic'
@ -31,14 +32,12 @@ class Ambito_Financiero(BasicNewsRecipe):
""" """
conversion_options = { conversion_options = {
'comment' : description 'comment': description, 'tags': category, 'publisher': publisher, 'language': language
, 'tags' : category
, 'publisher' : publisher
, 'language' : language
} }
keep_only_tags = [dict(name='div', attrs={'align': 'justify'})] keep_only_tags = [dict(name='div', attrs={'align': 'justify'})]
remove_tags = [dict(name=['object','link','embed','iframe','meta','link','table','img'])] remove_tags = [dict(name=['object', 'link', 'embed',
'iframe', 'meta', 'link', 'table', 'img'])]
remove_attributes = ['align'] remove_attributes = ['align']
def get_browser(self): def get_browser(self):
@ -79,9 +78,6 @@ class Ambito_Financiero(BasicNewsRecipe):
if url not in checker: if url not in checker:
checker.append(url) checker.append(url)
articles.append({ articles.append({
'title' :title 'title': title, 'date': date, 'url': url, 'description': u''
,'date' :date
,'url' :url
,'description':u''
}) })
return [(self.title, articles)] return [(self.title, articles)]

View File

@ -8,6 +8,7 @@ from calibre.web.feeds.news import BasicNewsRecipe
from calibre.utils.cleantext import clean_xml_chars from calibre.utils.cleantext import clean_xml_chars
from lxml import etree from lxml import etree
class AmericanThinker(BasicNewsRecipe): class AmericanThinker(BasicNewsRecipe):
title = u'American Thinker' title = u'American Thinker'
description = "American Thinker is a daily internet publication devoted to the thoughtful exploration of issues of importance to Americans." description = "American Thinker is a daily internet publication devoted to the thoughtful exploration of issues of importance to Americans."
@ -24,18 +25,14 @@ class AmericanThinker(BasicNewsRecipe):
auto_cleanup = True auto_cleanup = True
conversion_options = { conversion_options = {
'comment' : description 'comment': description, 'tags': category, 'publisher': publisher, 'language': language, 'linearize_tables': True
, 'tags' : category
, 'publisher' : publisher
, 'language' : language
, 'linearize_tables' : True
} }
def preprocess_raw_html(self, raw, url): def preprocess_raw_html(self, raw, url):
root = html5lib.parse( root = html5lib.parse(
clean_xml_chars(raw), treebuilder='lxml', clean_xml_chars(raw), treebuilder='lxml',
namespaceHTMLElements=False) namespaceHTMLElements=False)
for x in root.xpath('''descendant-or-self::*[@class and contains(concat(' ', normalize-space(@class), ' '), ' article_body ') and (@class and contains(concat(' ', normalize-space(@class), ' '), ' bottom '))]'''): for x in root.xpath('''descendant-or-self::*[@class and contains(concat(' ', normalize-space(@class), ' '), ' article_body ') and (@class and contains(concat(' ', normalize-space(@class), ' '), ' bottom '))]'''): # noqa
x.getparent().remove(x) x.getparent().remove(x)
return etree.tostring(root, encoding=unicode) return etree.tostring(root, encoding=unicode)

View File

@ -7,6 +7,7 @@ spectator.org
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
from css_selectors import Select from css_selectors import Select
class TheAmericanSpectator(BasicNewsRecipe): class TheAmericanSpectator(BasicNewsRecipe):
title = 'The American Spectator' title = 'The American Spectator'
__author__ = 'Kovid Goyal' __author__ = 'Kovid Goyal'
@ -20,7 +21,8 @@ class TheAmericanSpectator(BasicNewsRecipe):
encoding = 'utf-8' encoding = 'utf-8'
def parse_index(self): def parse_index(self):
root = self.index_to_soup('http://spectator.org/issues/current', as_tree=True) root = self.index_to_soup(
'http://spectator.org/issues/current', as_tree=True)
select = Select(root) select = Select(root)
main = tuple(select('div#block-system-main'))[0] main = tuple(select('div#block-system-main'))[0]
feeds = [] feeds = []
@ -43,7 +45,8 @@ class TheAmericanSpectator(BasicNewsRecipe):
for x in select('div.views-field-field-short-summary', li): for x in select('div.views-field-field-short-summary', li):
desc = self.tag_to_string(x) desc = self.tag_to_string(x)
break break
articles.append({'title':title, 'url':url, 'description':desc}) articles.append(
{'title': title, 'url': url, 'description': desc})
self.log('\t', title, 'at', url) self.log('\t', title, 'at', url)
feeds.append((section_title, articles)) feeds.append((section_title, articles))
return feeds return feeds

View File

@ -1,5 +1,6 @@
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
class AnDrumaMor(BasicNewsRecipe): class AnDrumaMor(BasicNewsRecipe):
title = u'An Druma M\xf3r' title = u'An Druma M\xf3r'
__author__ = "David O'Callaghan" __author__ = "David O'Callaghan"
@ -8,5 +9,5 @@ class AnDrumaMor(BasicNewsRecipe):
language = 'ga' language = 'ga'
use_embedded_content = True use_embedded_content = True
feeds = [(u'Nuacht Laeth\xfail', u'http://feeds.feedburner.com/NuachtLneLaethilArAnDrumaMr')] feeds = [(u'Nuacht Laeth\xfail',
u'http://feeds.feedburner.com/NuachtLneLaethilArAnDrumaMr')]

View File

@ -1,5 +1,6 @@
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1278347258(BasicNewsRecipe): class AdvancedUserRecipe1278347258(BasicNewsRecipe):
title = u'Anchorage Daily News' title = u'Anchorage Daily News'
__author__ = 'rty' __author__ = 'rty'
@ -7,7 +8,6 @@ class AdvancedUserRecipe1278347258(BasicNewsRecipe):
max_articles_per_feed = 100 max_articles_per_feed = 100
auto_cleanup = True auto_cleanup = True
feeds = [(u'Alaska News', u'http://www.adn.com/rss-feeds/feed/all'), feeds = [(u'Alaska News', u'http://www.adn.com/rss-feeds/feed/all'),
(u'Politics', u'http://www.adn.com/rss-feeds/feed/politics'), (u'Politics', u'http://www.adn.com/rss-feeds/feed/politics'),
] ]
@ -26,13 +26,3 @@ class AdvancedUserRecipe1278347258(BasicNewsRecipe):
conversion_options = {'linearize_tables': True} conversion_options = {'linearize_tables': True}
masthead_url = 'http://media.adn.com/includes/assets/images/adn_logo.2.gif' masthead_url = 'http://media.adn.com/includes/assets/images/adn_logo.2.gif'
#keep_only_tags = [
#dict(name='div', attrs={'class':'left_col story_mainbar'}),
#]
#remove_tags = [
#dict(name='div', attrs={'class':'story_tools'}),
#dict(name='p', attrs={'class':'ad_label'}),
#]
#remove_tags_after = [
#dict(name='div', attrs={'class':'advertisement'}),
#]

View File

@ -1,6 +1,7 @@
import re import re
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
class Android_com_pl(BasicNewsRecipe): class Android_com_pl(BasicNewsRecipe):
title = u'Android.com.pl' title = u'Android.com.pl'
__author__ = 'fenuks' __author__ = 'fenuks'
@ -11,5 +12,6 @@ class Android_com_pl(BasicNewsRecipe):
cover_url = 'http://android.com.pl/wp-content/themes/android/images/logo.png' cover_url = 'http://android.com.pl/wp-content/themes/android/images/logo.png'
oldest_article = 8 oldest_article = 8
max_articles_per_feed = 100 max_articles_per_feed = 100
preprocess_regexps = [(re.compile(ur'<p>.{,1}</p>', re.DOTALL), lambda match: '')] preprocess_regexps = [
(re.compile(ur'<p>.{,1}</p>', re.DOTALL), lambda match: '')]
feeds = [(u'Android', u'http://android.com.pl/feed/')] feeds = [(u'Android', u'http://android.com.pl/feed/')]

View File

@ -3,6 +3,7 @@
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1290663986(BasicNewsRecipe): class AdvancedUserRecipe1290663986(BasicNewsRecipe):
title = u'Animal Pol\u00EDtico' title = u'Animal Pol\u00EDtico'
publisher = u'Animal Pol\u00EDtico' publisher = u'Animal Pol\u00EDtico'
@ -14,8 +15,6 @@ class AdvancedUserRecipe1290663986(BasicNewsRecipe):
max_articles_per_feed = 100 max_articles_per_feed = 100
language = 'es_MX' language = 'es_MX'
#feeds = [(u'Animal Politico', u'http://www.animalpolitico.com/feed/')]
remove_tags_before = dict(name='div', id='main') remove_tags_before = dict(name='div', id='main')
remove_tags = [dict(name='div', attrs={'class': 'fb-like-button'})] remove_tags = [dict(name='div', attrs={'class': 'fb-like-button'})]
keep_only_tags = [dict(name='h1', attrs={'class': 'entry-title'}), keep_only_tags = [dict(name='h1', attrs={'class': 'entry-title'}),
@ -25,7 +24,8 @@ class AdvancedUserRecipe1290663986(BasicNewsRecipe):
def generic_parse(self, soup): def generic_parse(self, soup):
articles = [] articles = []
for entry in soup.findAll(lambda tag: tag.name == 'li' and tag.has_key('class') and tag['class'].find('hentry') != -1): #soup.findAll('li', 'hentry'): # soup.findAll('li', 'hentry'):
for entry in soup.findAll(lambda tag: tag.name == 'li' and tag.has_key('class') and tag['class'].find('hentry') != -1): # noqa
article_url = entry.a['href'] + '?print=yes' article_url = entry.a['href'] + '?print=yes'
article_title = entry.find('h3', 'entry-title') article_title = entry.find('h3', 'entry-title')
article_title = self.tag_to_string(article_title) article_title = self.tag_to_string(article_title)
@ -48,13 +48,14 @@ class AdvancedUserRecipe1290663986(BasicNewsRecipe):
def plumaje_parse(self, soup): def plumaje_parse(self, soup):
articles = [] articles = []
blogs_soup = soup.find(lambda tag: tag.name == 'ul' and tag.has_key('class') and tag['class'].find('bloglist-fecha') != -1) blogs_soup = soup.find(lambda tag: tag.name == 'ul' and tag.has_key('class') and tag['class'].find('bloglist-fecha') != -1) # noqa
for entry in blogs_soup.findAll('li'): for entry in blogs_soup.findAll('li'):
article_title = entry.p article_title = entry.p
article_url = article_title.a['href'] + '?print=yes' article_url = article_title.a['href'] + '?print=yes'
article_date = article_title.nextSibling article_date = article_title.nextSibling
article_title = self.tag_to_string(article_title) article_title = self.tag_to_string(article_title)
article_date = self.tag_to_string(article_date).replace(u'Last Updated: ', '') article_date = self.tag_to_string(
article_date).replace(u'Last Updated: ', '')
article_desc = self.tag_to_string(entry.find('h4')) article_desc = self.tag_to_string(entry.find('h4'))
# print 'Article:',article_title, article_date,article_url # print 'Article:',article_title, article_date,article_url
@ -67,13 +68,15 @@ class AdvancedUserRecipe1290663986(BasicNewsRecipe):
def boca_parse(self, soup): def boca_parse(self, soup):
articles = [] articles = []
for entry in soup.findAll(lambda tag: tag.name == 'div' and tag.has_key('class') and tag['class'].find('hentry') != -1): #soup.findAll('li', 'hentry'): # soup.findAll('li', 'hentry'):
for entry in soup.findAll(lambda tag: tag.name == 'div' and tag.has_key('class') and tag['class'].find('hentry') != -1): # noqa
article_title = entry.find('h2', 'entry-title') article_title = entry.find('h2', 'entry-title')
article_url = article_title.a['href'] + '?print=yes' article_url = article_title.a['href'] + '?print=yes'
article_title = self.tag_to_string(article_title) article_title = self.tag_to_string(article_title)
article_date = entry.find('span', 'entry-date') article_date = entry.find('span', 'entry-date')
article_date = self.tag_to_string(article_date) article_date = self.tag_to_string(article_date)
article_desc = self.tag_to_string(entry.find('div', 'entry-content')) article_desc = self.tag_to_string(
entry.find('div', 'entry-content'))
# print 'Article:',article_title, article_date,article_url # print 'Article:',article_title, article_date,article_url
# print entry['class'] # print entry['class']
@ -88,16 +91,14 @@ class AdvancedUserRecipe1290663986(BasicNewsRecipe):
return articles return articles
def parse_index(self): def parse_index(self):
gobierno_soup = self.index_to_soup(self.INDEX + 'gobierno/') gobierno_soup = self.index_to_soup(self.INDEX + 'gobierno/')
congreso_soup = self.index_to_soup(self.INDEX + 'congreso/') congreso_soup = self.index_to_soup(self.INDEX + 'congreso/')
seguridad_soup = self.index_to_soup(self.INDEX + 'seguridad/') seguridad_soup = self.index_to_soup(self.INDEX + 'seguridad/')
comunidad_soup = self.index_to_soup(self.INDEX + 'comunidad/') comunidad_soup = self.index_to_soup(self.INDEX + 'comunidad/')
plumaje_soup = self.index_to_soup(self.INDEX + 'plumaje/') plumaje_soup = self.index_to_soup(self.INDEX + 'plumaje/')
la_boca_del_lobo_soup = self.index_to_soup(self.INDEX+'category/la-boca-del-lobo/') la_boca_del_lobo_soup = self.index_to_soup(
self.INDEX + 'category/la-boca-del-lobo/')
gobierno_articles = self.generic_parse(gobierno_soup) gobierno_articles = self.generic_parse(gobierno_soup)
congreso_articles = self.generic_parse(congreso_soup) congreso_articles = self.generic_parse(congreso_soup)
@ -106,6 +107,5 @@ class AdvancedUserRecipe1290663986(BasicNewsRecipe):
plumaje_articles = self.plumaje_parse(plumaje_soup) plumaje_articles = self.plumaje_parse(plumaje_soup)
la_boca_del_lobo_articles = self.boca_parse(la_boca_del_lobo_soup) la_boca_del_lobo_articles = self.boca_parse(la_boca_del_lobo_soup)
return [(u'Gobierno', gobierno_articles), (u'Congreso', congreso_articles), (u'Seguridad', seguridad_articles), return [(u'Gobierno', gobierno_articles), (u'Congreso', congreso_articles), (u'Seguridad', seguridad_articles),
(u'Comunidad', comunidad_articles), (u'Plumaje', plumaje_articles), (u'La Boca del Lobo', la_boca_del_lobo_articles), ] (u'Comunidad', comunidad_articles), (u'Plumaje', plumaje_articles), (u'La Boca del Lobo', la_boca_del_lobo_articles), ]

View File

@ -1,6 +1,7 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
class AntywebRecipe(BasicNewsRecipe): class AntywebRecipe(BasicNewsRecipe):
encoding = 'utf-8' encoding = 'utf-8'
__license__ = 'GPL v3' __license__ = 'GPL v3'
@ -19,7 +20,8 @@ class AntywebRecipe(BasicNewsRecipe):
max_articles_per_feed = 100 max_articles_per_feed = 100
remove_javascript = True remove_javascript = True
simultaneous_downloads = 10 simultaneous_downloads = 10
ignore_duplicate_articles = {'title', 'url'} # zignoruj zduplikowane artykuły o takich samych tytułach LUB adresach # zignoruj zduplikowane artykuły o takich samych tytułach LUB adresach
ignore_duplicate_articles = {'title', 'url'}
scale_news_images = True scale_news_images = True
conversion_options = {'tags': u'news, aplikacje mobilne, Android, iOS, Windows Phone ', conversion_options = {'tags': u'news, aplikacje mobilne, Android, iOS, Windows Phone ',
'smarten_punctuation': True, 'smarten_punctuation': True,
@ -42,6 +44,7 @@ class AntywebRecipe(BasicNewsRecipe):
(u'Google', 'http://feeds.feedburner.com/AntywebGoogle'), (u'Google', 'http://feeds.feedburner.com/AntywebGoogle'),
(u'Microsoft', 'http://feeds.feedburner.com/AntywebMicrosoft') (u'Microsoft', 'http://feeds.feedburner.com/AntywebMicrosoft')
] ]
def preprocess_html(self, soup): def preprocess_html(self, soup):
for alink in soup.findAll('a'): for alink in soup.findAll('a'):
if alink.string is not None: if alink.string is not None:

View File

@ -12,7 +12,8 @@ class AssociatedPress(BasicNewsRecipe):
conversion_options = { conversion_options = {
'linearize_tables': True 'linearize_tables': True
} }
keep_only_tags = {'name':'table', 'attrs':{'class':lambda x: x and 'ap-story-table' in x.split()}} keep_only_tags = {'name': 'table', 'attrs': {
'class': lambda x: x and 'ap-story-table' in x.split()}}
remove_tags = [ remove_tags = [
{'class': ['ap-mediabox-table']}, {'class': ['ap-mediabox-table']},
{'name': 'img', 'src': lambda x: x and '//analytics.' in x}, {'name': 'img', 'src': lambda x: x and '//analytics.' in x},
@ -20,7 +21,8 @@ class AssociatedPress(BasicNewsRecipe):
def parse_index(self): def parse_index(self):
feeds = [] feeds = []
fronts = ('HOME', 'US', 'WORLD', 'BUSINESS', 'TECHNOLOGY', 'SPORTS', 'ENTERTAINMENT', 'HEALTH', 'SCIENCE', 'POLITICS') fronts = ('HOME', 'US', 'WORLD', 'BUSINESS', 'TECHNOLOGY',
'SPORTS', 'ENTERTAINMENT', 'HEALTH', 'SCIENCE', 'POLITICS')
for front in fronts: for front in fronts:
feeds.append([front.capitalize(), self.parse_section(front)]) feeds.append([front.capitalize(), self.parse_section(front)])
feeds[0][0] = 'Top Stories' feeds[0][0] = 'Top Stories'
@ -28,7 +30,8 @@ class AssociatedPress(BasicNewsRecipe):
def parse_section(self, front): def parse_section(self, front):
self.log('Processing section:', front) self.log('Processing section:', front)
soup = self.index_to_soup('http://hosted.ap.org/dynamic/fronts/%s?SITE=AP' % front) soup = self.index_to_soup(
'http://hosted.ap.org/dynamic/fronts/%s?SITE=AP' % front)
articles = [] articles = []
for x in soup.findAll('p', attrs={'class': ['ap-newsbriefitem-p', 'ap-topheadlineitem-p']}): for x in soup.findAll('p', attrs={'class': ['ap-newsbriefitem-p', 'ap-topheadlineitem-p']}):

View File

@ -10,6 +10,7 @@ http://www.apcom.NET/
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
class Apcom(BasicNewsRecipe): class Apcom(BasicNewsRecipe):
__author__ = 'Marini Gabriele' __author__ = 'Marini Gabriele'
description = 'Italian daily newspaper' description = 'Italian daily newspaper'
@ -35,8 +36,6 @@ class Apcom(BasicNewsRecipe):
dict(name='div', attrs={'id': 'ag_center'}) dict(name='div', attrs={'id': 'ag_center'})
] ]
feeds = [ feeds = [
(u'Globale', u'http://www.apcom.net/rss/globale.xml '), (u'Globale', u'http://www.apcom.net/rss/globale.xml '),
(u'Politica', u'http://www.apcom.net/rss/politica.xml'), (u'Politica', u'http://www.apcom.net/rss/politica.xml'),

View File

@ -1,5 +1,6 @@
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
class APOD(BasicNewsRecipe): class APOD(BasicNewsRecipe):
title = u'Astronomy Picture of the Day' title = u'Astronomy Picture of the Day'
__author__ = 'Starson17' __author__ = 'Starson17'
@ -23,6 +24,7 @@ class APOD(BasicNewsRecipe):
p{font-family:Arial,Helvetica,sans-serif;font-size:small;} p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
body{font-family:Helvetica,Arial,sans-serif;font-size:small;} body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
''' '''
def postprocess_html(self, soup, first_fetch): def postprocess_html(self, soup, first_fetch):
center_tags = soup.findAll(['center']) center_tags = soup.findAll(['center'])
p_tags = soup.findAll(['p']) p_tags = soup.findAll(['p'])
@ -35,4 +37,3 @@ class APOD(BasicNewsRecipe):
for tag in last2_p: for tag in last2_p:
tag.extract() tag.extract()
return soup return soup

View File

@ -9,6 +9,7 @@ appfunds.blogspot.com
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
class app_funds(BasicNewsRecipe): class app_funds(BasicNewsRecipe):
title = u'APP Funds' title = u'APP Funds'
__author__ = 'teepel <teepel44@gmail.com>' __author__ = 'teepel <teepel44@gmail.com>'

View File

@ -6,7 +6,9 @@ __Date__ = ''
from calibre import (__appname__, force_unicode, strftime) from calibre import (__appname__, force_unicode, strftime)
from calibre.utils.date import now as nowf from calibre.utils.date import now as nowf
import os, datetime, re import os
import datetime
import re
from calibre.web.feeds.recipes import BasicNewsRecipe from calibre.web.feeds.recipes import BasicNewsRecipe
from contextlib import nested from contextlib import nested
from calibre.ebooks.BeautifulSoup import BeautifulSoup from calibre.ebooks.BeautifulSoup import BeautifulSoup
@ -15,6 +17,7 @@ from calibre.ebooks.metadata.toc import TOC
from calibre.ebooks.metadata import MetaInformation from calibre.ebooks.metadata import MetaInformation
from calibre.utils.localization import canonicalize_lang from calibre.utils.localization import canonicalize_lang
class AppleDaily(BasicNewsRecipe): class AppleDaily(BasicNewsRecipe):
title = u'蘋果日報 (香港)' title = u'蘋果日報 (香港)'
__author__ = 'Eddie Lau' __author__ = 'Eddie Lau'
@ -32,7 +35,7 @@ class AppleDaily(BasicNewsRecipe):
category = 'Chinese, News, Hong Kong' category = 'Chinese, News, Hong Kong'
masthead_url = 'http://upload.wikimedia.org/wikipedia/zh/c/cf/AppleDailyLogo1.png' masthead_url = 'http://upload.wikimedia.org/wikipedia/zh/c/cf/AppleDailyLogo1.png'
extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px; max-height:90%;} h1 {font-size:200%; text-align:left; font-weight:bold;} p[class=video-caption] {font-size:50%; margin-left:auto; margin-right:auto;}' extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px; max-height:90%;} h1 {font-size:200%; text-align:left; font-weight:bold;} p[class=video-caption] {font-size:50%; margin-left:auto; margin-right:auto;}' # noqa
keep_only_tags = [dict(name='div', attrs={'id': 'content-article'})] keep_only_tags = [dict(name='div', attrs={'id': 'content-article'})]
remove_tags = [dict(name='div', attrs={'class': 'prev-next-btn'}), remove_tags = [dict(name='div', attrs={'class': 'prev-next-btn'}),
dict(name='p', attrs={'class': 'next'})] dict(name='p', attrs={'class': 'next'})]
@ -43,31 +46,31 @@ class AppleDaily(BasicNewsRecipe):
return dt_utc + datetime.timedelta(8.0 / 24) - datetime.timedelta(6.0 / 24) return dt_utc + datetime.timedelta(8.0 / 24) - datetime.timedelta(6.0 / 24)
def get_fetchdate(self): def get_fetchdate(self):
if __Date__ <> '': if __Date__ != '':
return __Date__ return __Date__
else: else:
return self.get_dtlocal().strftime("%Y%m%d") return self.get_dtlocal().strftime("%Y%m%d")
def get_fetchformatteddate(self): def get_fetchformatteddate(self):
if __Date__ <> '': if __Date__ != '':
return __Date__[0:4] + '-' + __Date__[4:6] + '-' + __Date__[6:8] return __Date__[0:4] + '-' + __Date__[4:6] + '-' + __Date__[6:8]
else: else:
return self.get_dtlocal().strftime("%Y-%m-%d") return self.get_dtlocal().strftime("%Y-%m-%d")
def get_fetchyear(self): def get_fetchyear(self):
if __Date__ <> '': if __Date__ != '':
return __Date__[0:4] return __Date__[0:4]
else: else:
return self.get_dtlocal().strftime("%Y") return self.get_dtlocal().strftime("%Y")
def get_fetchmonth(self): def get_fetchmonth(self):
if __Date__ <> '': if __Date__ != '':
return __Date__[4:6] return __Date__[4:6]
else: else:
return self.get_dtlocal().strftime("%m") return self.get_dtlocal().strftime("%m")
def get_fetchday(self): def get_fetchday(self):
if __Date__ <> '': if __Date__ != '':
return __Date__[6:8] return __Date__[6:8]
else: else:
return self.get_dtlocal().strftime("%d") return self.get_dtlocal().strftime("%d")
@ -117,7 +120,8 @@ class AppleDaily(BasicNewsRecipe):
a = li.find('a', href=True) a = li.find('a', href=True)
title = li.find('p', text=True).strip() title = li.find('p', text=True).strip()
if a is not None: if a is not None:
current_articles.append({'title': title, 'url':'http://hkm.appledaily.com/' + a.get('href', False)}) current_articles.append(
{'title': title, 'url': 'http://hkm.appledaily.com/' + a.get('href', False)})
pass pass
return current_articles return current_articles
@ -131,7 +135,8 @@ class AppleDaily(BasicNewsRecipe):
mi.publisher = __appname__ mi.publisher = __appname__
mi.author_sort = __appname__ mi.author_sort = __appname__
if self.publication_type: if self.publication_type:
mi.publication_type = 'periodical:'+self.publication_type+':'+self.short_title() mi.publication_type = 'periodical:' + \
self.publication_type + ':' + self.short_title()
mi.timestamp = nowf() mi.timestamp = nowf()
article_titles, aseen = [], set() article_titles, aseen = [], set()
for f in feeds: for f in feeds:
@ -152,7 +157,8 @@ class AppleDaily(BasicNewsRecipe):
# This one affects the pub date shown in kindle title # This one affects the pub date shown in kindle title
# mi.pubdate = nowf() # mi.pubdate = nowf()
# now appears to need the time field to be > 12.00noon as well # now appears to need the time field to be > 12.00noon as well
mi.pubdate = datetime.datetime(int(self.get_fetchyear()), int(self.get_fetchmonth()), int(self.get_fetchday()), 12, 30, 0) mi.pubdate = datetime.datetime(int(self.get_fetchyear()), int(
self.get_fetchmonth()), int(self.get_fetchday()), 12, 30, 0)
opf_path = os.path.join(dir, 'index.opf') opf_path = os.path.join(dir, 'index.opf')
ncx_path = os.path.join(dir, 'index.ncx') ncx_path = os.path.join(dir, 'index.ncx')
@ -161,12 +167,14 @@ class AppleDaily(BasicNewsRecipe):
mp = getattr(self, 'masthead_path', None) mp = getattr(self, 'masthead_path', None)
if mp is not None and os.access(mp, os.R_OK): if mp is not None and os.access(mp, os.R_OK):
from calibre.ebooks.metadata.opf2 import Guide from calibre.ebooks.metadata.opf2 import Guide
ref = Guide.Reference(os.path.basename(self.masthead_path), os.getcwdu()) ref = Guide.Reference(os.path.basename(
self.masthead_path), os.getcwdu())
ref.type = 'masthead' ref.type = 'masthead'
ref.title = 'Masthead Image' ref.title = 'Masthead Image'
opf.guide.append(ref) opf.guide.append(ref)
manifest = [os.path.join(dir, 'feed_%d'%i) for i in range(len(feeds))] manifest = [os.path.join(dir, 'feed_%d' % i)
for i in range(len(feeds))]
manifest.append(os.path.join(dir, 'index.html')) manifest.append(os.path.join(dir, 'index.html'))
manifest.append(os.path.join(dir, 'index.ncx')) manifest.append(os.path.join(dir, 'index.ncx'))
@ -197,7 +205,6 @@ class AppleDaily(BasicNewsRecipe):
self.play_order_counter = 0 self.play_order_counter = 0
self.play_order_map = {} self.play_order_map = {}
def feed_index(num, parent): def feed_index(num, parent):
f = feeds[num] f = feeds[num]
for j, a in enumerate(f): for j, a in enumerate(f):
@ -218,10 +225,12 @@ class AppleDaily(BasicNewsRecipe):
self.play_order_counter += 1 self.play_order_counter += 1
po = self.play_order_counter po = self.play_order_counter
parent.add_item('%sindex.html' % adir, None, parent.add_item('%sindex.html' % adir, None,
a.title if a.title else _('Untitled Article'), a.title if a.title else _(
'Untitled Article'),
play_order=po, author=auth, play_order=po, author=auth,
description=desc, toc_thumbnail=tt) description=desc, toc_thumbnail=tt)
last = os.path.join(self.output_dir, ('%sindex.html'%adir).replace('/', os.sep)) last = os.path.join(
self.output_dir, ('%sindex.html' % adir).replace('/', os.sep))
for sp in a.sub_pages: for sp in a.sub_pages:
prefix = os.path.commonprefix([opf_path, sp]) prefix = os.path.commonprefix([opf_path, sp])
relp = sp[len(prefix):] relp = sp[len(prefix):]
@ -234,12 +243,14 @@ class AppleDaily(BasicNewsRecipe):
soup = BeautifulSoup(src) soup = BeautifulSoup(src)
body = soup.find('body') body = soup.find('body')
if body is not None: if body is not None:
prefix = '/'.join('..'for i in range(2*len(re.findall(r'link\d+', last)))) prefix = '/'.join('..'for i in range(2 *
len(re.findall(r'link\d+', last))))
templ = self.navbar.generate(True, num, j, len(f), templ = self.navbar.generate(True, num, j, len(f),
not self.has_single_feed, not self.has_single_feed,
a.orig_url, __appname__, prefix=prefix, a.orig_url, __appname__, prefix=prefix,
center=self.center_navbar) center=self.center_navbar)
elem = BeautifulSoup(templ.render(doctype='xhtml').decode('utf-8')).find('div') elem = BeautifulSoup(templ.render(
doctype='xhtml').decode('utf-8')).find('div')
body.insert(len(body.contents), elem) body.insert(len(body.contents), elem)
with open(last, 'wb') as fi: with open(last, 'wb') as fi:
fi.write(unicode(soup).encode('utf-8')) fi.write(unicode(soup).encode('utf-8'))
@ -273,5 +284,3 @@ class AppleDaily(BasicNewsRecipe):
with nested(open(opf_path, 'wb'), open(ncx_path, 'wb')) as (opf_file, ncx_file): with nested(open(opf_path, 'wb'), open(ncx_path, 'wb')) as (opf_file, ncx_file):
opf.render(opf_file, ncx_file) opf.render(opf_file, ncx_file)

View File

@ -105,5 +105,6 @@ class AppledailyTW(BasicNewsRecipe):
def preprocess_raw_html(self, raw_html, url): def preprocess_raw_html(self, raw_html, url):
raw_html = re.sub(ur'<a href=".*?<br><br>.*?<\/a>', '', raw_html) raw_html = re.sub(ur'<a href=".*?<br><br>.*?<\/a>', '', raw_html)
raw_html = re.sub(ur'<title>(.*?)[\s]+\|.*<\/title>', '<title>\1<\/title>', raw_html) raw_html = re.sub(
ur'<title>(.*?)[\s]+\|.*<\/title>', '<title>\1<\/title>', raw_html)
return raw_html return raw_html

View File

@ -2,6 +2,7 @@
from __future__ import unicode_literals from __future__ import unicode_literals
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
class BasicUserRecipe1395137685(BasicNewsRecipe): class BasicUserRecipe1395137685(BasicNewsRecipe):
title = u'Applefobia' title = u'Applefobia'
__author__ = 'koliberek' __author__ = 'koliberek'

View File

@ -1,5 +1,6 @@
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
class AmericanProspect(BasicNewsRecipe): class AmericanProspect(BasicNewsRecipe):
title = u'American Prospect' title = u'American Prospect'
__author__ = u'Michael Heinz, a.peter' __author__ = u'Michael Heinz, a.peter'
@ -12,11 +13,8 @@ class AmericanProspect(BasicNewsRecipe):
no_stylesheets = True no_stylesheets = True
remove_javascript = True remove_javascript = True
#keep_only_tags = [dict(name='div', attrs={'class':'pad_10L10R'})]
#remove_tags = [dict(name='form'), dict(name='div', attrs={'class':['bkt_caption','sharebox noprint','badgebox']})]
use_embedded_content = False use_embedded_content = False
no_stylesheets = True no_stylesheets = True
auto_cleanup = True auto_cleanup = True
feeds = [(u'Articles', u'feed://www.prospect.org/articles_rss.jsp')] feeds = [(u'Articles', u'feed://www.prospect.org/articles_rss.jsp')]

View File

@ -5,6 +5,7 @@ __docformat__ = 'restructuredtext en'
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1335656316(BasicNewsRecipe): class AdvancedUserRecipe1335656316(BasicNewsRecipe):
title = u'AraInfo.org' title = u'AraInfo.org'
__author__ = 'Ruben Pollan' __author__ = 'Ruben Pollan'
@ -16,4 +17,9 @@ class AdvancedUserRecipe1335656316(BasicNewsRecipe):
auto_cleanup = True auto_cleanup = True
cover_url = u'http://arainfo.org/wordpress/wp-content/uploads/2011/10/logo-web_alta.jpg' cover_url = u'http://arainfo.org/wordpress/wp-content/uploads/2011/10/logo-web_alta.jpg'
feeds = [(u'Movimientos', u'http://arainfo.org/category/movimientos/feed/'), (u'Econom\xeda', u'http://arainfo.org/category/economia/feed/'), (u'Ecolog\xeda', u'http://arainfo.org/category/ecologia/feed/'), (u'Culturas', u'http://arainfo.org/category/culturas/feed/'), (u'Altavoz', u'http://arainfo.org/category/altavoz/feed/')] feeds = [
(u'Movimientos', u'http://arainfo.org/category/movimientos/feed/'),
(u'Econom\xeda', u'http://arainfo.org/category/economia/feed/'),
(u'Ecolog\xeda', u'http://arainfo.org/category/ecologia/feed/'),
(u'Culturas', u'http://arainfo.org/category/culturas/feed/'),
(u'Altavoz', u'http://arainfo.org/category/altavoz/feed/')]

View File

@ -6,12 +6,12 @@ www.arabianbusiness.com
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
class Arabian_Business(BasicNewsRecipe): class Arabian_Business(BasicNewsRecipe):
title = 'Arabian Business' title = 'Arabian Business'
__author__ = 'Darko Miletic' __author__ = 'Darko Miletic'
description = 'Comprehensive Guide to Middle East Business & Gulf Industry News including,Banking & Finance,Construction,Energy,Media & Marketing,Real Estate,Transportation,Travel,Technology,Politics,Healthcare,Lifestyle,Jobs & UAE guide.Top Gulf & Dubai Business News.' description = 'Comprehensive Guide to Middle East Business & Gulf Industry News including,Banking & Finance,Construction,Energy,Media & Marketing,Real Estate,Transportation,Travel,Technology,Politics,Healthcare,Lifestyle,Jobs & UAE guide.Top Gulf & Dubai Business News.' # noqa
publisher = 'Arabian Business Publishing Ltd.' publisher = 'Arabian Business Publishing Ltd.'
category = 'ArabianBusiness.com,Arab Business News,Middle East Business News,Middle East Business,Arab Media News,Industry Events,Middle East Industry News,Arab Business Industry,Dubai Business News,Financial News,UAE Business News,Middle East Press Releases,Gulf News,Arab News,GCC Business News,Banking Finance,Media Marketing,Construction,Oil Gas,Retail,Transportation,Travel Hospitality,Photos,Videos,Life Style,Fashion,United Arab Emirates,UAE,Dubai,Sharjah,Abu Dhabi,Qatar,KSA,Saudi Arabia,Bahrain,Kuwait,Oman,Europe,South Asia,America,Asia,news'
oldest_article = 2 oldest_article = 2
max_articles_per_feed = 200 max_articles_per_feed = 200
no_stylesheets = True no_stylesheets = True
@ -29,48 +29,45 @@ class Arabian_Business(BasicNewsRecipe):
""" """
conversion_options = { conversion_options = {
'comment' : description 'comment': description, 'publisher': publisher, 'language': language
, 'tags' : category
, 'publisher' : publisher
, 'language' : language
} }
remove_tags_before = dict(attrs={'id': 'article-title'}) remove_tags_before = dict(attrs={'id': 'article-title'})
remove_tags = [ remove_tags = [
dict(name=['meta','link','base','iframe','embed','object']) dict(name=['meta', 'link', 'base', 'iframe', 'embed', 'object']), dict(
,dict(attrs={'class':'printfooter'}) attrs={'class': 'printfooter'})
] ]
remove_attributes = ['lang'] remove_attributes = ['lang']
feeds = [ feeds = [
(u'Africa' , u'http://www.arabianbusiness.com/world/Africa/?service=rss' )
,(u'Americas' , u'http://www.arabianbusiness.com/world/americas/?service=rss' ) (u'Africa', u'http://www.arabianbusiness.com/world/Africa/?service=rss'),
,(u'Asia Pacific' , u'http://www.arabianbusiness.com/world/asia-pacific/?service=rss' ) (u'Americas', u'http://www.arabianbusiness.com/world/americas/?service=rss'),
,(u'Europe' , u'http://www.arabianbusiness.com/world/europe/?service=rss' ) (u'Asia Pacific', u'http://www.arabianbusiness.com/world/asia-pacific/?service=rss'),
,(u'Middle East' , u'http://www.arabianbusiness.com/world/middle-east/?service=rss' ) (u'Europe', u'http://www.arabianbusiness.com/world/europe/?service=rss'),
,(u'South Asia' , u'http://www.arabianbusiness.com/world/south-asia/?service=rss' ) (u'Middle East', u'http://www.arabianbusiness.com/world/middle-east/?service=rss'),
,(u'Banking & Finance', u'http://www.arabianbusiness.com/industries/banking-finance/?service=rss' ) (u'South Asia', u'http://www.arabianbusiness.com/world/south-asia/?service=rss'),
,(u'Construction' , u'http://www.arabianbusiness.com/industries/construction/?service=rss' ) (u'Banking & Finance', u'http://www.arabianbusiness.com/industries/banking-finance/?service=rss'),
,(u'Education' , u'http://www.arabianbusiness.com/industries/education/?service=rss' ) (u'Construction', u'http://www.arabianbusiness.com/industries/construction/?service=rss'),
,(u'Energy' , u'http://www.arabianbusiness.com/industries/energy/?service=rss' ) (u'Education', u'http://www.arabianbusiness.com/industries/education/?service=rss'),
,(u'Healthcare' , u'http://www.arabianbusiness.com/industries/healthcare/?service=rss' ) (u'Energy', u'http://www.arabianbusiness.com/industries/energy/?service=rss'),
,(u'Media' , u'http://www.arabianbusiness.com/industries/media/?service=rss' ) (u'Healthcare', u'http://www.arabianbusiness.com/industries/healthcare/?service=rss'),
,(u'Real Estate' , u'http://www.arabianbusiness.com/industries/real-estate/?service=rss' ) (u'Media', u'http://www.arabianbusiness.com/industries/media/?service=rss'),
,(u'Retail' , u'http://www.arabianbusiness.com/industries/retail/?service=rss' ) (u'Real Estate', u'http://www.arabianbusiness.com/industries/real-estate/?service=rss'),
,(u'Technology' , u'http://www.arabianbusiness.com/industries/technology/?service=rss' ) (u'Retail', u'http://www.arabianbusiness.com/industries/retail/?service=rss'),
,(u'Transport' , u'http://www.arabianbusiness.com/industries/transport/?service=rss' ) (u'Technology', u'http://www.arabianbusiness.com/industries/technology/?service=rss'),
,(u'Travel' , u'http://www.arabianbusiness.com/industries/travel-hospitality/?service=rss') (u'Transport', u'http://www.arabianbusiness.com/industries/transport/?service=rss'),
,(u'Equities' , u'http://www.arabianbusiness.com/markets/equities/?service=rss' ) (u'Travel', u'http://www.arabianbusiness.com/industries/travel-hospitality/?service=rss'),
,(u'Commodities' , u'http://www.arabianbusiness.com/markets/commodities/?service=rss' ) (u'Equities', u'http://www.arabianbusiness.com/markets/equities/?service=rss'),
,(u'Currencies' , u'http://www.arabianbusiness.com/markets/currencies/?service=rss' ) (u'Commodities', u'http://www.arabianbusiness.com/markets/commodities/?service=rss'),
,(u'Market Data' , u'http://www.arabianbusiness.com/markets/market-data/?service=rss' ) (u'Currencies', u'http://www.arabianbusiness.com/markets/currencies/?service=rss'),
,(u'Comment' , u'http://www.arabianbusiness.com/opinion/comment/?service=rss' ) (u'Market Data', u'http://www.arabianbusiness.com/markets/market-data/?service=rss'),
,(u'Think Tank' , u'http://www.arabianbusiness.com/opinion/think-tank/?service=rss' ) (u'Comment', u'http://www.arabianbusiness.com/opinion/comment/?service=rss'),
,(u'Arts' , u'http://www.arabianbusiness.com/lifestyle/arts/?service=rss' ) (u'Think Tank', u'http://www.arabianbusiness.com/opinion/think-tank/?service=rss'),
,(u'Cars' , u'http://www.arabianbusiness.com/lifestyle/cars/?service=rss' ) (u'Arts', u'http://www.arabianbusiness.com/lifestyle/arts/?service=rss'),
,(u'Food' , u'http://www.arabianbusiness.com/lifestyle/food/?service=rss' ) (u'Cars', u'http://www.arabianbusiness.com/lifestyle/cars/?service=rss'),
,(u'Sport' , u'http://www.arabianbusiness.com/lifestyle/sport/?service=rss' ) (u'Food', u'http://www.arabianbusiness.com/lifestyle/food/?service=rss'),
(u'Sport', u'http://www.arabianbusiness.com/lifestyle/sport/?service=rss')
] ]
def print_version(self, url): def print_version(self, url):

View File

@ -1,5 +1,6 @@
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
class Arbetaren_SE(BasicNewsRecipe): class Arbetaren_SE(BasicNewsRecipe):
title = u'Arbetaren' title = u'Arbetaren'
__author__ = 'Joakim Lindskog' __author__ = 'Joakim Lindskog'
@ -15,10 +16,7 @@ class Arbetaren_SE(BasicNewsRecipe):
language = 'sv' language = 'sv'
conversion_options = { conversion_options = {
'comment' : description 'comment': description, 'tags': category, 'publisher': publisher, 'language': language
, 'tags' : category
, 'publisher' : publisher
, 'language' : language
} }
keep_only_tags = [dict(name='div', attrs={'id': 'article'})] keep_only_tags = [dict(name='div', attrs={'id': 'article'})]

View File

@ -3,6 +3,7 @@
from calibre.web.feeds.recipes import BasicNewsRecipe from calibre.web.feeds.recipes import BasicNewsRecipe
import re import re
class Arcadia_BBS(BasicNewsRecipe): class Arcadia_BBS(BasicNewsRecipe):
title = u'Arcadia' title = u'Arcadia'
__author__ = 'Masahiro Hasegawa' __author__ = 'Masahiro Hasegawa'
@ -25,11 +26,8 @@ class Arcadia_BBS(BasicNewsRecipe):
for s in sec[:-2]: for s in sec[:-2]:
s_result.append(dict(title=s.string, s_result.append(dict(title=s.string,
url="http://www.mai-net.net" + s['href'], url="http://www.mai-net.net" + s['href'],
date=s.parent.parent.parent.findAll('td')[3].string[:-6], date=s.parent.parent.parent.findAll('td')[
3].string[:-6],
description='', content='')) description='', content=''))
result.append((s_result[0]['title'], s_result)) result.append((s_result[0]['title'], s_result))
return result return result

View File

@ -10,6 +10,7 @@ import os
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ptempfile import PersistentTemporaryDirectory from calibre.ptempfile import PersistentTemporaryDirectory
class Arcamax(BasicNewsRecipe): class Arcamax(BasicNewsRecipe):
title = 'Arcamax' title = 'Arcamax'
__author__ = 'Kovid Goyal' __author__ = 'Kovid Goyal'
@ -21,14 +22,12 @@ class Arcamax(BasicNewsRecipe):
remove_javascript = True remove_javascript = True
cover_url = 'http://www.arcamax.com/images/pub/amuse/leftcol/zits.jpg' cover_url = 'http://www.arcamax.com/images/pub/amuse/leftcol/zits.jpg'
# ###### USER PREFERENCES - SET COMICS AND NUMBER OF COMICS TO RETRIEVE ######## # ###### USER PREFERENCES - SET COMICS AND NUMBER OF COMICS TO RETRIEVE ##
num_comics_to_get = 7 num_comics_to_get = 7
# CHOOSE COMIC STRIPS BELOW - REMOVE COMMENT '# ' FROM IN FRONT OF DESIRED STRIPS # CHOOSE COMIC STRIPS BELOW - REMOVE COMMENT '# ' FROM IN FRONT OF DESIRED
# STRIPS
conversion_options = {'linearize_tables' : True conversion_options = {'linearize_tables': True, 'comment': description, 'tags': category, 'language': language
, 'comment' : description
, 'tags' : category
, 'language' : language
} }
keep_only_tags = [ keep_only_tags = [
@ -93,18 +92,22 @@ class Arcamax(BasicNewsRecipe):
num -= 1 num -= 1
raw = self.index_to_soup(url, raw=True) raw = self.index_to_soup(url, raw=True)
self.panel_counter += 1 self.panel_counter += 1
path = os.path.join(self.panel_tdir, '%d.html' % self.panel_counter) path = os.path.join(self.panel_tdir, '%d.html' %
self.panel_counter)
with open(path, 'wb') as f: with open(path, 'wb') as f:
f.write(raw) f.write(raw)
soup = self.index_to_soup(raw) soup = self.index_to_soup(raw)
a = soup.find(name='a', attrs={'class': ['prev']}) a = soup.find(name='a', attrs={'class': ['prev']})
prev_page_url = 'http://www.arcamax.com' + a['href'] prev_page_url = 'http://www.arcamax.com' + a['href']
title = self.tag_to_string(soup.find('title')).partition('|')[0].strip() title = self.tag_to_string(
soup.find('title')).partition('|')[0].strip()
if 'for' not in title.split(): if 'for' not in title.split():
title = title + ' for today' title = title + ' for today'
date = self.tag_to_string(soup.find(name='span', attrs={'class':['cur']})) date = self.tag_to_string(
soup.find(name='span', attrs={'class': ['cur']}))
self.log('\tFound:', title, 'at:', url) self.log('\tFound:', title, 'at:', url)
current_articles.append({'title': title, 'url':'file://' + path , 'description':'', 'date': date}) current_articles.append(
{'title': title, 'url': 'file://' + path, 'description': '', 'date': date})
if self.test and len(current_articles) >= self.test[1]: if self.test and len(current_articles) >= self.test[1]:
break break
url = prev_page_url url = prev_page_url

View File

@ -1,5 +1,6 @@
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
class Archeowiesci(BasicNewsRecipe): class Archeowiesci(BasicNewsRecipe):
title = u'Archeowieści' title = u'Archeowieści'
__author__ = 'fenuks' __author__ = 'fenuks'
@ -11,7 +12,8 @@ class Archeowiesci(BasicNewsRecipe):
needs_subscription = 'optional' needs_subscription = 'optional'
max_articles_per_feed = 100 max_articles_per_feed = 100
auto_cleanup = True auto_cleanup = True
remove_tags=[dict(name='span', attrs={'class':['post-ratings', 'post-ratings-loading']})] remove_tags = [
dict(name='span', attrs={'class': ['post-ratings', 'post-ratings-loading']})]
feeds = [(u'Archeowieści', u'http://archeowiesci.pl/feed/')] feeds = [(u'Archeowieści', u'http://archeowiesci.pl/feed/')]
def parse_feeds(self): def parse_feeds(self):

View File

@ -10,6 +10,7 @@ import time
from calibre import strftime from calibre import strftime
from calibre.web.feeds.recipes import BasicNewsRecipe from calibre.web.feeds.recipes import BasicNewsRecipe
class ArgNoticias(BasicNewsRecipe): class ArgNoticias(BasicNewsRecipe):
title = 'ARG Noticias' title = 'ARG Noticias'
__author__ = 'Darko Miletic' __author__ = 'Darko Miletic'
@ -28,27 +29,27 @@ class ArgNoticias(BasicNewsRecipe):
extra_css = '' extra_css = ''
conversion_options = { conversion_options = {
'comment' : description 'comment': description, 'tags': category, 'publisher': publisher, 'language': language
, 'tags' : category
, 'publisher': publisher
, 'language' : language
} }
keep_only_tags = [dict(name='div', attrs={'class':['itemHeader','itemBody','itemAuthorBlock']})] keep_only_tags = [
dict(name='div', attrs={'class': ['itemHeader', 'itemBody', 'itemAuthorBlock']})]
remove_tags = [ remove_tags = [
dict(name=['object', 'link', 'base', 'iframe']), dict(name=['object', 'link', 'base', 'iframe']),
dict(name='div', attrs={'class':['b2jsocial_parent','itemSocialSharing']}) dict(name='div', attrs={
'class': ['b2jsocial_parent', 'itemSocialSharing']})
] ]
feeds = [ feeds = [
(u'Politica' , u'http://www.argnoticias.com/index.php/politica' )
,(u'Economia' , u'http://www.argnoticias.com/index.php/economia' ) (u'Politica', u'http://www.argnoticias.com/index.php/politica'),
,(u'Sociedad' , u'http://www.argnoticias.com/index.php/sociedad' ) (u'Economia', u'http://www.argnoticias.com/index.php/economia'),
,(u'Mundo' , u'http://www.argnoticias.com/index.php/mundo' ) (u'Sociedad', u'http://www.argnoticias.com/index.php/sociedad'),
,(u'Deportes' , u'http://www.argnoticias.com/index.php/deportes' ) (u'Mundo', u'http://www.argnoticias.com/index.php/mundo'),
,(u'Espectaculos', u'http://www.argnoticias.com/index.php/espectaculos') (u'Deportes', u'http://www.argnoticias.com/index.php/deportes'),
,(u'Tendencias' , u'http://www.argnoticias.com/index.php/tendencias' ) (u'Espectaculos', u'http://www.argnoticias.com/index.php/espectaculos'),
(u'Tendencias', u'http://www.argnoticias.com/index.php/tendencias')
] ]
def parse_index(self): def parse_index(self):
@ -57,7 +58,8 @@ class ArgNoticias(BasicNewsRecipe):
checker = [] checker = []
for feedobj in lfeeds: for feedobj in lfeeds:
feedtitle, feedurl = feedobj feedtitle, feedurl = feedobj
self.report_progress(0, _('Fetching feed')+' %s...'%(feedtitle if feedtitle else feedurl)) self.report_progress(0, _('Fetching feed') + ' %s...' %
(feedtitle if feedtitle else feedurl))
articles = [] articles = []
soup = self.index_to_soup(feedurl) soup = self.index_to_soup(feedurl)
for item in soup.findAll('div', attrs={'class': 'Nota'}): for item in soup.findAll('div', attrs={'class': 'Nota'}):
@ -70,27 +72,23 @@ class ArgNoticias(BasicNewsRecipe):
if url not in checker: if url not in checker:
checker.append(url) checker.append(url)
articles.append({ articles.append({
'title' :title 'title': title, 'date': date, 'url': url, 'description': description
,'date' :date
,'url' :url
,'description':description
}) })
for item in soup.findAll('li'): for item in soup.findAll('li'):
atag = item.find('a', attrs={'class': 'moduleItemTitle'}) atag = item.find('a', attrs={'class': 'moduleItemTitle'})
if atag: if atag:
ptag = item.find('div', attrs={'class':'moduleItemIntrotext'}) ptag = item.find(
'div', attrs={'class': 'moduleItemIntrotext'})
url = self.INDEX + atag['href'] url = self.INDEX + atag['href']
title = self.tag_to_string(atag) title = self.tag_to_string(atag)
description = self.tag_to_string(ptag) description = self.tag_to_string(ptag)
date = strftime("%a, %d %b %Y %H:%M:%S +0000",time.gmtime()) date = strftime(
"%a, %d %b %Y %H:%M:%S +0000", time.gmtime())
if url not in checker: if url not in checker:
checker.append(url) checker.append(url)
articles.append({ articles.append({
'title' :title 'title': title, 'date': date, 'url': url, 'description': description
,'date' :date
,'url' :url
,'description':description
}) })
totalfeeds.append((feedtitle, articles)) totalfeeds.append((feedtitle, articles))
return totalfeeds return totalfeeds

View File

@ -5,11 +5,12 @@ azrepublic.com
''' '''
from calibre.web.feeds.recipes import BasicNewsRecipe from calibre.web.feeds.recipes import BasicNewsRecipe
class AdvancedUserRecipe1307301031(BasicNewsRecipe): class AdvancedUserRecipe1307301031(BasicNewsRecipe):
title = u'AZRepublic' title = u'AZRepublic'
__author__ = 'Jim Olo' __author__ = 'Jim Olo'
language = 'en' language = 'en'
description = "The Arizona Republic is Arizona's leading provider of news and information, and has published a daily newspaper in Phoenix for more than 110 years" description = "The Arizona Republic is Arizona's leading provider of news and information, and has published a daily newspaper in Phoenix for more than 110 years" # noqa
publisher = 'AZRepublic/AZCentral' publisher = 'AZRepublic/AZCentral'
masthead_url = 'http://freedom2t.com/wp-content/uploads/press_az_republic_v2.gif' masthead_url = 'http://freedom2t.com/wp-content/uploads/press_az_republic_v2.gif'
cover_url = 'http://www.valleyleadership.org/Common/Img/2line4c_AZRepublic%20with%20azcentral%20logo.jpg' cover_url = 'http://www.valleyleadership.org/Common/Img/2line4c_AZRepublic%20with%20azcentral%20logo.jpg'
@ -21,30 +22,42 @@ class AdvancedUserRecipe1307301031(BasicNewsRecipe):
no_stylesheets = True no_stylesheets = True
remove_javascript = True remove_javascript = True
# extra_css = '.headline {font-size: medium;} \n .fact { padding-top: 10pt }' # extra_css = '.headline {font-size: medium;} \n .fact { padding-top: 10pt }'
extra_css = ' body{ font-family: Verdana,Helvetica,Arial,sans-serif } .headline {font-size: medium} .introduction{font-weight: bold} .story-feature{display: block; padding: 0; border: 1px solid; width: 40%; font-size: small} .story-feature h2{text-align: center; text-transform: uppercase} ' extra_css = ' body{ font-family: Verdana,Helvetica,Arial,sans-serif } .headline {font-size: medium} .introduction{font-weight: bold} .story-feature{display: block; padding: 0; border: 1px solid; width: 40%; font-size: small} .story-feature h2{text-align: center; text-transform: uppercase} ' # noqa
remove_attributes = ['width', 'height', 'h2', 'subHeadline', 'style'] remove_attributes = ['width', 'height', 'h2', 'subHeadline', 'style']
remove_tags = [ remove_tags = [
dict(name='div', attrs={'id':['slidingBillboard', 'top728x90', 'subindex-header', 'topSearch']}), dict(name='div', attrs={
dict(name='div', attrs={'id':['simplesearch', 'azcLoginBox', 'azcLoginBoxInner', 'topNav']}), 'id': ['slidingBillboard', 'top728x90', 'subindex-header', 'topSearch']}),
dict(name='div', attrs={'id':['carsDrop', 'homesDrop', 'rentalsDrop', 'classifiedDrop']}), dict(name='div', attrs={
'id': ['simplesearch', 'azcLoginBox', 'azcLoginBoxInner', 'topNav']}),
dict(name='div', attrs={
'id': ['carsDrop', 'homesDrop', 'rentalsDrop', 'classifiedDrop']}),
dict(name='div', attrs={'id': ['nav', 'mp', 'subnav', 'jobsDrop']}), dict(name='div', attrs={'id': ['nav', 'mp', 'subnav', 'jobsDrop']}),
dict(name='h6', attrs={'class': ['section-header']}), dict(name='h6', attrs={'class': ['section-header']}),
dict(name='a', attrs={'href': ['#comments']}), dict(name='a', attrs={'href': ['#comments']}),
dict(name='div', attrs={'class':['articletools clearfix', 'floatRight']}), dict(name='div', attrs={
dict(name='div', attrs={'id':['fbFrame', 'ob', 'storyComments', 'storyGoogleAdBox']}), 'class': ['articletools clearfix', 'floatRight']}),
dict(name='div', attrs={'id':['storyTopHomes', 'openRight', 'footerwrap', 'copyright']}), dict(name='div', attrs={
dict(name='div', attrs={'id':['blogsHed', 'blog_comments', 'blogByline','blogTopics']}), 'id': ['fbFrame', 'ob', 'storyComments', 'storyGoogleAdBox']}),
dict(name='div', attrs={'id':['membersRightMain', 'dealsfooter', 'azrTopHed', 'azrRightCol']}), dict(name='div', attrs={
'id': ['storyTopHomes', 'openRight', 'footerwrap', 'copyright']}),
dict(name='div', attrs={
'id': ['blogsHed', 'blog_comments', 'blogByline', 'blogTopics']}),
dict(name='div', attrs={
'id': ['membersRightMain', 'dealsfooter', 'azrTopHed', 'azrRightCol']}),
dict(name='div', attrs={'id': ['ttdHeader', 'ttdTimeWeather']}), dict(name='div', attrs={'id': ['ttdHeader', 'ttdTimeWeather']}),
dict(name='div', attrs={'id':['membersRightMain', 'deals-header-wrap']}), dict(name='div', attrs={
dict(name='div', attrs={'id':['todoTopSearchBar', 'byline clearfix', 'subdex-topnav']}), 'id': ['membersRightMain', 'deals-header-wrap']}),
dict(name='div', attrs={
'id': ['todoTopSearchBar', 'byline clearfix', 'subdex-topnav']}),
dict(name='h1', attrs={'id': ['SEOtext']}), dict(name='h1', attrs={'id': ['SEOtext']}),
dict(name='table', attrs={'class': ['ap-mediabox-table']}), dict(name='table', attrs={'class': ['ap-mediabox-table']}),
dict(name='p', attrs={'class': ['ap_para']}), dict(name='p', attrs={'class': ['ap_para']}),
dict(name='span', attrs={'class': ['source-org vcard', 'org fn']}), dict(name='span', attrs={'class': ['source-org vcard', 'org fn']}),
dict(name='a', attrs={'href':['http://hosted2.ap.org/APDEFAULT/privacy']}), dict(name='a', attrs={
dict(name='a', attrs={'href':['http://hosted2.ap.org/APDEFAULT/terms']}), 'href': ['http://hosted2.ap.org/APDEFAULT/privacy']}),
dict(name='a', attrs={
'href': ['http://hosted2.ap.org/APDEFAULT/terms']}),
dict(name='div', attrs={'id': ['onespot_nextclick']}), dict(name='div', attrs={'id': ['onespot_nextclick']}),
] ]
@ -62,7 +75,3 @@ class AdvancedUserRecipe1307301031(BasicNewsRecipe):
(u'ArizonaDeals', u'http://www.azcentral.com/members/Blog%7E/RealDealsblog'), (u'ArizonaDeals', u'http://www.azcentral.com/members/Blog%7E/RealDealsblog'),
(u'GroceryDeals', u'http://www.azcentral.com/members/Blog%7E/RealDealsblog/tag/2646') (u'GroceryDeals', u'http://www.azcentral.com/members/Blog%7E/RealDealsblog/tag/2646')
] ]

View File

@ -1,4 +1,6 @@
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
class ArmyTimes(BasicNewsRecipe): class ArmyTimes(BasicNewsRecipe):
title = 'Army Times' title = 'Army Times'
__author__ = 'jde' __author__ = 'jde'
@ -23,8 +25,6 @@ class ArmyTimes(BasicNewsRecipe):
remove_empty_feeds = True remove_empty_feeds = True
auto_cleanup = True auto_cleanup = True
feeds = [ feeds = [
('News', 'http://www.armytimes.com/rss_news.php'), ('News', 'http://www.armytimes.com/rss_news.php'),
@ -37,6 +37,3 @@ class ArmyTimes(BasicNewsRecipe):
('Guard & Reserve', 'http://www.armytimes.com/rss_guard.php'), ('Guard & Reserve', 'http://www.armytimes.com/rss_guard.php'),
] ]

View File

@ -7,6 +7,7 @@ __description__ = 'Get some fresh news from Arrêt sur images'
from calibre.web.feeds.recipes import BasicNewsRecipe from calibre.web.feeds.recipes import BasicNewsRecipe
class Asi(BasicNewsRecipe): class Asi(BasicNewsRecipe):
title = 'Arrêt sur images' title = 'Arrêt sur images'
@ -34,7 +35,8 @@ class Asi(BasicNewsRecipe):
conversion_options = {'smarten_punctuation': True} conversion_options = {'smarten_punctuation': True}
remove_tags = [dict(id='vite-titre'), dict(id='header'), dict(id='wrap-connexion'), dict(id='col_right'), dict(name='div', attrs={'class':'bloc-chroniqueur-2'}), dict(id='footercontainer')] remove_tags = [dict(id='vite-titre'), dict(id='header'), dict(id='wrap-connexion'), dict(id='col_right'),
dict(name='div', attrs={'class': 'bloc-chroniqueur-2'}), dict(id='footercontainer')]
def print_version(self, url): def print_version(self, url):
return url.replace('contenu.php', 'contenu-imprimable.php') return url.replace('contenu.php', 'contenu-imprimable.php')
@ -51,4 +53,3 @@ class Asi(BasicNewsRecipe):
br['password'] = self.password br['password'] = self.password
br.submit() br.submit()
return br return br

View File

@ -7,6 +7,7 @@ arstechnica.com
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import BeautifulSoup from calibre.ebooks.BeautifulSoup import BeautifulSoup
class ArsTechnica(BasicNewsRecipe): class ArsTechnica(BasicNewsRecipe):
title = u'Ars Technica' title = u'Ars Technica'
language = 'en' language = 'en'
@ -31,40 +32,32 @@ class ArsTechnica(BasicNewsRecipe):
''' '''
conversion_options = { conversion_options = {
'comments' : description 'comments': description, 'tags': category, 'language': language, 'publisher': publisher
,'tags' : category
,'language' : language
,'publisher' : publisher
} }
keep_only_tags = [ keep_only_tags = [
dict(attrs={'class':'standalone'}) dict(attrs={'class': 'standalone'}), dict(attrs={'id': 'article-guts'})
,dict(attrs={'id':'article-guts'})
] ]
remove_tags = [ remove_tags = [
dict(name=['object','link','embed','iframe','meta']) dict(name=['object', 'link', 'embed', 'iframe', 'meta']), dict(attrs={'class': 'corner-info'}), dict(attrs={
,dict(attrs={'class':'corner-info'}) 'id': 'article-footer-wrap'}), dict(attrs={'class': 'article-expander'}), dict(name='nav', attrs={'class': 'subheading'})
,dict(attrs={'id': 'article-footer-wrap'})
,dict(attrs={'class': 'article-expander'})
,dict(name='nav',attrs={'class': 'subheading'})
] ]
remove_attributes = ['lang'] remove_attributes = ['lang']
feeds = [ feeds = [
(u'Ars Features (All our long-form feature articles)', u'http://feeds.arstechnica.com/arstechnica/features'),
(u'Ars Features (All our long-form feature articles)' , u'http://feeds.arstechnica.com/arstechnica/features') (u'Technology Lab (Information Technology)', u'http://feeds.arstechnica.com/arstechnica/technology-lab'),
, (u'Technology Lab (Information Technology)' , u'http://feeds.arstechnica.com/arstechnica/technology-lab') (u'Gear & Gadgets', u'http://feeds.arstechnica.com/arstechnica/gadgets'),
,(u'Gear & Gadgets' , u'http://feeds.arstechnica.com/arstechnica/gadgets') (u'Ministry of Innovation (Business of Technology)', u'http://feeds.arstechnica.com/arstechnica/business'),
,(u'Ministry of Innovation (Business of Technology)' , u'http://feeds.arstechnica.com/arstechnica/business') (u'Risk Assessment (Security & Hacktivism)', u'http://feeds.arstechnica.com/arstechnica/security'),
,(u'Risk Assessment (Security & Hacktivism)' , u'http://feeds.arstechnica.com/arstechnica/security') (u'Law & Disorder (Civilizations & Discontents)', u'http://feeds.arstechnica.com/arstechnica/tech-policy'),
,(u'Law & Disorder (Civilizations & Discontents)' , u'http://feeds.arstechnica.com/arstechnica/tech-policy') (u'Infinite Loop (Apple Ecosystem)', u'http://feeds.arstechnica.com/arstechnica/apple'),
,(u'Infinite Loop (Apple Ecosystem)' , u'http://feeds.arstechnica.com/arstechnica/apple') (u'Opposable Thumbs (Gaming & Entertainment)', u'http://feeds.arstechnica.com/arstechnica/gaming'),
,(u'Opposable Thumbs (Gaming & Entertainment)' , u'http://feeds.arstechnica.com/arstechnica/gaming') (u'Scientific Method (Science & Exploration)', u'http://feeds.arstechnica.com/arstechnica/science'),
,(u'Scientific Method (Science & Exploration)' , u'http://feeds.arstechnica.com/arstechnica/science') (u'Multiverse (Exploratoins & Meditations on Sci-Fi)', u'http://feeds.arstechnica.com/arstechnica/multiverse'),
,(u'Multiverse (Exploratoins & Meditations on Sci-Fi)' , u'http://feeds.arstechnica.com/arstechnica/multiverse') (u'Cars Technica (All Things Automotive)', u'http://feeds.arstechnica.com/arstechnica/cars'),
,(u'Cars Technica (All Things Automotive)' , u'http://feeds.arstechnica.com/arstechnica/cars') (u'Staff Blogs (From the Minds of Ars)', u'http://feeds.arstechnica.com/arstechnica/staff-blogs')
,(u'Staff Blogs (From the Minds of Ars)' , u'http://feeds.arstechnica.com/arstechnica/staff-blogs')
] ]
def append_page(self, soup, appendtag, position): def append_page(self, soup, appendtag, position):

View File

@ -1,20 +1,18 @@
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
class HindustanTimes(BasicNewsRecipe): class HindustanTimes(BasicNewsRecipe):
title = u'Asco de vida' title = u'Asco de vida'
language = 'es' language = 'es'
__author__ = 'Krittika Goyal' __author__ = 'Krittika Goyal'
oldest_article = 1 # days oldest_article = 1 # days
max_articles_per_feed = 25 max_articles_per_feed = 25
#encoding = 'cp1252'
use_embedded_content = False use_embedded_content = False
no_stylesheets = True no_stylesheets = True
keep_only_tags = dict(name='div', attrs={'class': 'box story'}) keep_only_tags = dict(name='div', attrs={'class': 'box story'})
feeds = [ feeds = [
('News', ('News',
'http://feeds2.feedburner.com/AscoDeVida'), 'http://feeds2.feedburner.com/AscoDeVida'),
] ]

View File

@ -8,6 +8,7 @@ asiaone.com
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
class AsiaOne(BasicNewsRecipe): class AsiaOne(BasicNewsRecipe):
title = u'AsiaOne' title = u'AsiaOne'
oldest_article = 2 oldest_article = 2
@ -20,7 +21,8 @@ class AsiaOne(BasicNewsRecipe):
remove_tags = [dict(name='span', attrs={'class': 'footer'})] remove_tags = [dict(name='span', attrs={'class': 'footer'})]
keep_only_tags = [ keep_only_tags = [
dict(name='h1', attrs={'class': 'headline'}), dict(name='h1', attrs={'class': 'headline'}),
dict(name='div', attrs={'class':['article-content','person-info row']}) dict(name='div', attrs={
'class': ['article-content', 'person-info row']})
] ]
feeds = [ feeds = [

View File

@ -7,10 +7,11 @@ www.asianreviewofbooks.com
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
class AsianReviewOfBooks(BasicNewsRecipe): class AsianReviewOfBooks(BasicNewsRecipe):
title = 'The Asian Review of Books' title = 'The Asian Review of Books'
__author__ = 'Darko Miletic' __author__ = 'Darko Miletic'
description = 'In addition to reviewing books about or of relevance to Asia, the Asian Review of Books also features long-format essays by leading Asian writers and thinkers, to providing an unparalleled forum for discussion of key contemporary issues by Asians for Asia and a vehicle of intellectual depth and breadth where leading thinkers can write on the books, arts and ideas of the day. Widely quoted and referenced, with an archive of more than one thousand book reviews, it is the only web resource dedicated to Asian books. And now, with the addition of the new premium content, the Asian Review of Books, is a must-read publication.' description = 'In addition to reviewing books about or of relevance to Asia, the Asian Review of Books also features long-format essays by leading Asian writers and thinkers, to providing an unparalleled forum for discussion of key contemporary issues by Asians for Asia and a vehicle of intellectual depth and breadth where leading thinkers can write on the books, arts and ideas of the day. Widely quoted and referenced, with an archive of more than one thousand book reviews, it is the only web resource dedicated to Asian books. And now, with the addition of the new premium content, the Asian Review of Books, is a must-read publication.' # noqa
publisher = 'The Asian Review of Books' publisher = 'The Asian Review of Books'
category = 'literature, books, reviews, Asia' category = 'literature, books, reviews, Asia'
oldest_article = 30 oldest_article = 30
@ -31,13 +32,9 @@ class AsianReviewOfBooks(BasicNewsRecipe):
""" """
conversion_options = { conversion_options = {
'comment' : description 'comment': description, 'tags': category, 'publisher': publisher, 'language': language
, 'tags' : category
, 'publisher' : publisher
, 'language' : language
} }
remove_tags = [dict(name=['object', 'script', 'iframe', 'embed'])] remove_tags = [dict(name=['object', 'script', 'iframe', 'embed'])]
remove_attributes = ['style', 'onclick'] remove_attributes = ['style', 'onclick']
feeds = [(u'Articles', u'http://www.asianreviewofbooks.com/new/rss.php')] feeds = [(u'Articles', u'http://www.asianreviewofbooks.com/new/rss.php')]
@ -48,4 +45,3 @@ class AsianReviewOfBooks(BasicNewsRecipe):
def preprocess_raw_html(self, raw, url): def preprocess_raw_html(self, raw, url):
return '<html><head><title>title</title></head><body>' + raw + '</body></html>' return '<html><head><title>title</title></head><body>' + raw + '</body></html>'

View File

@ -1,13 +1,14 @@
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
class AstroNEWS(BasicNewsRecipe): class AstroNEWS(BasicNewsRecipe):
title = u'AstroNEWS' title = u'AstroNEWS'
__author__ = 'fenuks' __author__ = 'fenuks'
description = u'AstroNEWS regularnie dostarcza wiadomości o wydarzeniach związanych z astronomią i astronautyką. Informujemy o aktualnych odkryciach i wydarzeniach naukowych, zapowiadamy ciekawe zjawiska astronomiczne. Serwis jest częścią portalu astronomicznego AstroNET prowadzonego przez miłośników astronomii i zawodowych astronomów.' description = u'AstroNEWS regularnie dostarcza wiadomości o wydarzeniach związanych z astronomią i astronautyką. Informujemy o aktualnych odkryciach i wydarzeniach naukowych, zapowiadamy ciekawe zjawiska astronomiczne. Serwis jest częścią portalu astronomicznego AstroNET prowadzonego przez miłośników astronomii i zawodowych astronomów.' # noqa
category = 'astronomy, science' category = 'astronomy, science'
language = 'pl' language = 'pl'
oldest_article = 8 oldest_article = 8
max_articles_per_feed = 100 max_articles_per_feed = 100
#extra_css= 'table {text-align: left;}'
no_stylesheets = True no_stylesheets = True
cover_url = 'http://news.astronet.pl/img/logo_news.jpg' cover_url = 'http://news.astronet.pl/img/logo_news.jpg'
remove_attributes = ['width', 'align'] remove_attributes = ['width', 'align']

View File

@ -1,11 +1,12 @@
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:fdm=marker:ai # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:fdm=marker:ai
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
class Astroflesz(BasicNewsRecipe): class Astroflesz(BasicNewsRecipe):
title = u'Astroflesz' title = u'Astroflesz'
oldest_article = 7 oldest_article = 7
__author__ = 'fenuks' __author__ = 'fenuks'
description = u'astroflesz.pl - to portal poświęcony astronomii. Informuje zarówno o aktualnych wydarzeniach i odkryciach naukowych, jak również zapowiada ciekawe zjawiska astronomiczne' description = u'astroflesz.pl - to portal poświęcony astronomii. Informuje zarówno o aktualnych wydarzeniach i odkryciach naukowych, jak również zapowiada ciekawe zjawiska astronomiczne' # noqa
category = 'astronomy' category = 'astronomy'
language = 'pl' language = 'pl'
cover_url = 'http://www.astroflesz.pl/templates/astroflesz/images/logo/logo.png' cover_url = 'http://www.astroflesz.pl/templates/astroflesz/images/logo/logo.png'
@ -17,7 +18,8 @@ class Astroflesz(BasicNewsRecipe):
remove_attributes = ['style'] remove_attributes = ['style']
keep_only_tags = [dict(id="k2Container")] keep_only_tags = [dict(id="k2Container")]
remove_tags_after = dict(name='div', attrs={'class': 'itemLinks'}) remove_tags_after = dict(name='div', attrs={'class': 'itemLinks'})
remove_tags = [dict(name='div', attrs={'class':['itemLinks', 'itemToolbar', 'itemRatingBlock']})] remove_tags = [dict(name='div', attrs={
'class': ['itemLinks', 'itemToolbar', 'itemRatingBlock']})]
feeds = [(u'Wszystkie', u'http://astroflesz.pl/?format=feed')] feeds = [(u'Wszystkie', u'http://astroflesz.pl/?format=feed')]
def postprocess_html(self, soup, first_fetch): def postprocess_html(self, soup, first_fetch):

View File

@ -6,6 +6,7 @@ www.athensnews.gr
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
class AthensNews(BasicNewsRecipe): class AthensNews(BasicNewsRecipe):
title = 'Athens News' title = 'Athens News'
__author__ = 'Darko Miletic' __author__ = 'Darko Miletic'
@ -30,35 +31,31 @@ class AthensNews(BasicNewsRecipe):
""" """
conversion_options = { conversion_options = {
'comment' : description 'comment': description, 'tags': category, 'publisher': publisher, 'language': language, 'linearize_tables': True
, 'tags' : category
, 'publisher' : publisher
, 'language' : language
, 'linearize_tables' : True
} }
remove_tags = [ remove_tags = [
dict(name=['meta', 'link']) dict(name=['meta', 'link'])
] ]
keep_only_tags = [ keep_only_tags = [
dict(name='span',attrs={'class':'big'}) dict(name='span', attrs={'class': 'big'}), dict(
,dict(name='td', attrs={'class':['articlepubdate','text']}) name='td', attrs={'class': ['articlepubdate', 'text']})
] ]
remove_attributes = ['lang'] remove_attributes = ['lang']
feeds = [ feeds = [
(u'News' , u'http://www.athensnews.gr/category/1/feed' )
,(u'Politics' , u'http://www.athensnews.gr/category/8/feed' ) (u'News', u'http://www.athensnews.gr/category/1/feed'),
,(u'Business' , u'http://www.athensnews.gr/category/2/feed' ) (u'Politics', u'http://www.athensnews.gr/category/8/feed'),
,(u'Economy' , u'http://www.athensnews.gr/category/11/feed') (u'Business', u'http://www.athensnews.gr/category/2/feed'),
,(u'Community' , u'http://www.athensnews.gr/category/5/feed' ) (u'Economy', u'http://www.athensnews.gr/category/11/feed'),
,(u'Arts' , u'http://www.athensnews.gr/category/3/feed' ) (u'Community', u'http://www.athensnews.gr/category/5/feed'),
,(u'Living in Athens', u'http://www.athensnews.gr/category/7/feed' ) (u'Arts', u'http://www.athensnews.gr/category/3/feed'),
,(u'Sports' , u'http://www.athensnews.gr/category/4/feed' ) (u'Living in Athens', u'http://www.athensnews.gr/category/7/feed'),
,(u'Travel' , u'http://www.athensnews.gr/category/6/feed' ) (u'Sports', u'http://www.athensnews.gr/category/4/feed'),
,(u'Letters' , u'http://www.athensnews.gr/category/44/feed') (u'Travel', u'http://www.athensnews.gr/category/6/feed'),
,(u'Media' , u'http://www.athensnews.gr/multimedia/feed' ) (u'Letters', u'http://www.athensnews.gr/category/44/feed'),
(u'Media', u'http://www.athensnews.gr/multimedia/feed')
] ]
def print_version(self, url): def print_version(self, url):

View File

@ -9,10 +9,12 @@ import html5lib
from lxml import html from lxml import html
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
def classes(classes): def classes(classes):
q = frozenset(classes.split(' ')) q = frozenset(classes.split(' '))
return dict(attrs={'class': lambda x: x and frozenset(x.split()).intersection(q)}) return dict(attrs={'class': lambda x: x and frozenset(x.split()).intersection(q)})
class TheAtlantic(BasicNewsRecipe): class TheAtlantic(BasicNewsRecipe):
title = 'The Atlantic' title = 'The Atlantic'
@ -23,7 +25,8 @@ class TheAtlantic(BasicNewsRecipe):
encoding = 'utf-8' encoding = 'utf-8'
keep_only_tags = [ keep_only_tags = [
classes('article-header article-body article-magazine metadata article-cover-content lead-img'), classes(
'article-header article-body article-magazine metadata article-cover-content lead-img'),
] ]
remove_tags = [ remove_tags = [
{'name': ['meta', 'link', 'noscript']}, {'name': ['meta', 'link', 'noscript']},
@ -75,18 +78,22 @@ class TheAtlantic(BasicNewsRecipe):
url = a['href'] url = a['href']
if url.startswith('/'): if url.startswith('/'):
url = 'http://www.theatlantic.com' + url url = 'http://www.theatlantic.com' + url
li = a.findParent('li', attrs={'class':lambda x: x and 'article' in x.split()}) li = a.findParent(
'li', attrs={'class': lambda x: x and 'article' in x.split()})
desc = '' desc = ''
dek = li.find(attrs={'class':lambda x:x and 'dek' in x.split()}) dek = li.find(
attrs={'class': lambda x: x and 'dek' in x.split()})
if dek is not None: if dek is not None:
desc += self.tag_to_string(dek) desc += self.tag_to_string(dek)
byline = li.find(attrs={'class':lambda x:x and 'byline' in x.split()}) byline = li.find(
attrs={'class': lambda x: x and 'byline' in x.split()})
if byline is not None: if byline is not None:
desc += ' -- ' + self.tag_to_string(byline) desc += ' -- ' + self.tag_to_string(byline)
self.log('\t', title, 'at', url) self.log('\t', title, 'at', url)
if desc: if desc:
self.log('\t\t', desc) self.log('\t\t', desc)
current_articles.append({'title':title, 'url':url, 'description':desc}) current_articles.append(
{'title': title, 'url': url, 'description': desc})
if current_articles: if current_articles:
feeds.append((current_section, current_articles)) feeds.append((current_section, current_articles))
return feeds return feeds

View File

@ -3,6 +3,7 @@
from __future__ import unicode_literals, division, absolute_import, print_function from __future__ import unicode_literals, division, absolute_import, print_function
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1421956712(BasicNewsRecipe): class AdvancedUserRecipe1421956712(BasicNewsRecipe):
title = 'TheAtlantic.com' title = 'TheAtlantic.com'
__author__ = 'ebrandon' __author__ = 'ebrandon'

View File

@ -2,6 +2,7 @@
from __future__ import unicode_literals from __future__ import unicode_literals
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
class AttacEspanaRecipe (BasicNewsRecipe): class AttacEspanaRecipe (BasicNewsRecipe):
__author__ = 'Marc Busqué <marc@lamarciana.com>' __author__ = 'Marc Busqué <marc@lamarciana.com>'
__url__ = 'http://www.lamarciana.com' __url__ = 'http://www.lamarciana.com'
@ -9,7 +10,7 @@ class AttacEspanaRecipe (BasicNewsRecipe):
__license__ = 'GPL v3' __license__ = 'GPL v3'
__copyright__ = '2012, Marc Busqué <marc@lamarciana.com>' __copyright__ = '2012, Marc Busqué <marc@lamarciana.com>'
title = u'attac.es' title = u'attac.es'
description = u'La Asociación por la Tasación de las Transacciones Financieras y por la Ayuda a los Ciudadanos (ATTAC) es un movimiento internacional altermundialista que promueve el control democrático de los mercados financieros y las instituciones encargadas de su control mediante la reflexión política y la movilización social.' description = u'La Asociación por la Tasación de las Transacciones Financieras y por la Ayuda a los Ciudadanos (ATTAC) es un movimiento internacional altermundialista que promueve el control democrático de los mercados financieros y las instituciones encargadas de su control mediante la reflexión política y la movilización social.' # noqa
url = 'http://www.attac.es' url = 'http://www.attac.es'
language = 'es' language = 'es'
tags = 'contrainformación, información alternativa' tags = 'contrainformación, información alternativa'

View File

@ -11,13 +11,13 @@ http://www.corrieredellosport.it/
''' '''
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
class Auto(BasicNewsRecipe): class Auto(BasicNewsRecipe):
__author__ = 'Gabriele Marini' __author__ = 'Gabriele Marini'
description = 'Auto and Formula 1' description = 'Auto and Formula 1'
cover_url = 'http://www.auto.it/res/imgs/logo_Auto.png' cover_url = 'http://www.auto.it/res/imgs/logo_Auto.png'
title = u'Auto' title = u'Auto'
publisher = 'CONTE Editore' publisher = 'CONTE Editore'
category = 'Sport' category = 'Sport'
@ -34,13 +34,11 @@ class Auto(BasicNewsRecipe):
no_stylesheets = True no_stylesheets = True
html2lrf_options = [ html2lrf_options = [
'--comment', description '--comment', description, '--category', category, '--publisher', publisher, '--ignore-tables'
, '--category', category
, '--publisher', publisher
, '--ignore-tables'
] ]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True' html2epub_options = 'publisher="' + publisher + '"\ncomments="' + \
description + '"\ntags="' + category + '"\nlinearize_tables=True'
keep_only_tags = [ keep_only_tags = [
dict(name='h2', attrs={'class': ['tit_Article y_Txt']}), dict(name='h2', attrs={'class': ['tit_Article y_Txt']}),
@ -51,13 +49,8 @@ class Auto(BasicNewsRecipe):
dict(name='div', attrs={'class': ['txt_Article txtBox_cms']}), dict(name='div', attrs={'class': ['txt_Article txtBox_cms']}),
dict(name='testoscheda')] dict(name='testoscheda')]
feeds = [ feeds = [
(u'Tutte le News', u'http://www.auto.it/rss/articoli.xml'), (u'Tutte le News', u'http://www.auto.it/rss/articoli.xml'),
(u'Prove su Strada', u'http://www.auto.it/rss/prove+6.xml'), (u'Prove su Strada', u'http://www.auto.it/rss/prove+6.xml'),
(u'Novit\xe0', u'http://www.auto.it/rss/novita+3.xml') (u'Novit\xe0', u'http://www.auto.it/rss/novita+3.xml')
] ]

View File

@ -1,5 +1,6 @@
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
class AutoBlog(BasicNewsRecipe): class AutoBlog(BasicNewsRecipe):
title = u'Auto Blog' title = u'Auto Blog'
__author__ = 'Welovelucy' __author__ = 'Welovelucy'
@ -12,5 +13,3 @@ class AutoBlog(BasicNewsRecipe):
def print_version(self, url): def print_version(self, url):
return url + 'print/' return url + 'print/'

View File

@ -11,13 +11,13 @@ http://www.corrieredellosport.it/
''' '''
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
class AutoPR(BasicNewsRecipe): class AutoPR(BasicNewsRecipe):
__author__ = 'Gabriele Marini' __author__ = 'Gabriele Marini'
description = 'Auto and Formula 1' description = 'Auto and Formula 1'
cover_url = 'http://www.auto.it/res/imgs/logo_Auto.png' cover_url = 'http://www.auto.it/res/imgs/logo_Auto.png'
title = u'Auto Prove' title = u'Auto Prove'
publisher = 'CONTE Editore' publisher = 'CONTE Editore'
category = 'Sport' category = 'Sport'
@ -40,8 +40,6 @@ class AutoPR(BasicNewsRecipe):
# , '--ignore-tables' # , '--ignore-tables'
# ] # ]
#html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True'
keep_only_tags = [ keep_only_tags = [
dict(name='h2', attrs={'class': ['tit_Article y_Txt']}), dict(name='h2', attrs={'class': ['tit_Article y_Txt']}),
dict(name='h2', attrs={'class': ['tit_Article']}), dict(name='h2', attrs={'class': ['tit_Article']}),
@ -74,17 +72,19 @@ class AutoPR(BasicNewsRecipe):
def create_links_append(self, link, date, description): def create_links_append(self, link, date, description):
current_articles = [] current_articles = []
current_articles.append({'title': 'Generale', 'url': link,'description':description, 'date':date}), current_articles.append(
current_articles.append({'title': 'Design', 'url': link.replace('scheda','design'),'description':'scheda', 'date':''}), {'title': 'Generale', 'url': link, 'description': description, 'date': date}),
current_articles.append({'title': 'Interni', 'url': link.replace('scheda','interni'),'description':'Interni', 'date':''}), current_articles.append({'title': 'Design', 'url': link.replace(
current_articles.append({'title': 'Tecnica', 'url': link.replace('scheda','tecnica'),'description':'Tecnica', 'date':''}), 'scheda', 'design'), 'description': 'scheda', 'date': ''}),
current_articles.append({'title': 'Su Strada', 'url': link.replace('scheda','su_strada'),'description':'Su Strada', 'date':''}), current_articles.append({'title': 'Interni', 'url': link.replace(
current_articles.append({'title': 'Pagella', 'url': link.replace('scheda','pagella'),'description':'Pagella', 'date':''}), 'scheda', 'interni'), 'description': 'Interni', 'date': ''}),
current_articles.append({'title': 'Rilevamenti', 'url': link.replace('scheda','telemetria'),'description':'Rilevamenti', 'date':''}) current_articles.append({'title': 'Tecnica', 'url': link.replace(
'scheda', 'tecnica'), 'description': 'Tecnica', 'date': ''}),
current_articles.append({'title': 'Su Strada', 'url': link.replace(
'scheda', 'su_strada'), 'description': 'Su Strada', 'date': ''}),
current_articles.append({'title': 'Pagella', 'url': link.replace(
'scheda', 'pagella'), 'description': 'Pagella', 'date': ''}),
current_articles.append({'title': 'Rilevamenti', 'url': link.replace(
'scheda', 'telemetria'), 'description': 'Rilevamenti', 'date': ''})
return current_articles return current_articles

View File

@ -9,6 +9,7 @@ auto-bild.ro
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
class AutoBild(BasicNewsRecipe): class AutoBild(BasicNewsRecipe):
title = u'Auto Bild' title = u'Auto Bild'
__author__ = u'Silviu Cotoar\u0103' __author__ = u'Silviu Cotoar\u0103'
@ -24,22 +25,16 @@ class AutoBild(BasicNewsRecipe):
cover_url = 'http://www.auto-bild.ro/images/autobild.gif' cover_url = 'http://www.auto-bild.ro/images/autobild.gif'
conversion_options = { conversion_options = {
'comments' : description 'comments': description, 'tags': category, 'language': language, 'publisher': publisher
,'tags' : category
,'language' : language
,'publisher' : publisher
} }
keep_only_tags = [ keep_only_tags = [
dict(name='div', attrs={'class': 'box_2 articol clearfix'}) dict(name='div', attrs={'class': 'box_2 articol clearfix'})
] ]
remove_tags = [ remove_tags = [
dict(name='div', attrs={'class':['detail']}) dict(name='div', attrs={'class': ['detail']}), dict(name='a', attrs={'id': ['zoom_link']}), dict(
, dict(name='a', attrs={'id':['zoom_link']}) name='div', attrs={'class': ['icons clearfix']}), dict(name='div', attrs={'class': ['pub_articol clearfix']})
, dict(name='div', attrs={'class':['icons clearfix']})
, dict(name='div', attrs={'class':['pub_articol clearfix']})
] ]

View File

@ -1,6 +1,7 @@
import re import re
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
class autogids(BasicNewsRecipe): class autogids(BasicNewsRecipe):
title = u'Automatiseringgids IT' title = u'Automatiseringgids IT'
oldest_article = 7 oldest_article = 7

View File

@ -9,11 +9,12 @@ www.autosport.com
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
class autosport(BasicNewsRecipe): class autosport(BasicNewsRecipe):
title = u'Autosport' title = u'Autosport'
__author__ = 'MrStefan <mrstefaan@gmail.com>' __author__ = 'MrStefan <mrstefaan@gmail.com>'
language = 'en_GB' language = 'en_GB'
description =u'Daily Formula 1 and motorsport news from the leading weekly motor racing magazine. The authority on Formula 1, F1, MotoGP, GP2, Champ Car, Le Mans...' description = u'Daily Formula 1 and motorsport news from the leading weekly motor racing magazine. The authority on Formula 1, F1, MotoGP, GP2, Champ Car, Le Mans...' # noqa
masthead_url = 'http://cdn.images.autosport.com/asdotcom.gif' masthead_url = 'http://cdn.images.autosport.com/asdotcom.gif'
remove_empty_feeds = True remove_empty_feeds = True
oldest_article = 1 oldest_article = 1
@ -23,8 +24,10 @@ class autosport(BasicNewsRecipe):
keep_only_tags = [] keep_only_tags = []
keep_only_tags.append(dict(name='h1', attrs={'class': 'news_headline'})) keep_only_tags.append(dict(name='h1', attrs={'class': 'news_headline'}))
keep_only_tags.append(dict(name = 'td', attrs = {'class' : 'news_article_author'})) keep_only_tags.append(
keep_only_tags.append(dict(name = 'td', attrs = {'class' : 'news_article_date'})) dict(name='td', attrs={'class': 'news_article_author'}))
keep_only_tags.append(
dict(name='td', attrs={'class': 'news_article_date'}))
keep_only_tags.append(dict(name='p')) keep_only_tags.append(dict(name='p'))
feeds = [(u'ALL NEWS', u'http://www.autosport.com/rss/allnews.xml')] feeds = [(u'ALL NEWS', u'http://www.autosport.com/rss/allnews.xml')]

View File

@ -9,6 +9,7 @@ avantaje.ro
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
class Avantaje(BasicNewsRecipe): class Avantaje(BasicNewsRecipe):
title = u'Avantaje' title = u'Avantaje'
__author__ = u'Silviu Cotoar\u0103' __author__ = u'Silviu Cotoar\u0103'
@ -24,25 +25,16 @@ class Avantaje(BasicNewsRecipe):
cover_url = 'http://www.avantaje.ro/images/default/logo.gif' cover_url = 'http://www.avantaje.ro/images/default/logo.gif'
conversion_options = { conversion_options = {
'comments' : description 'comments': description, 'tags': category, 'language': language, 'publisher': publisher
,'tags' : category
,'language' : language
,'publisher' : publisher
} }
keep_only_tags = [ keep_only_tags = [
dict(name='div', attrs={'id':'articol'}) dict(name='div', attrs={'id': 'articol'}), dict(name='div', attrs={
, dict(name='div', attrs={'class':'gallery clearfix'}) 'class': 'gallery clearfix'}), dict(name='div', attrs={'align': 'justify'})
, dict(name='div', attrs={'align':'justify'})
] ]
remove_tags = [ remove_tags = [
dict(name='div', attrs={'id':['color_sanatate_box']}) dict(name='div', attrs={'id': ['color_sanatate_box']}), dict(name='div', attrs={'class': ['nav']}), dict(name='div', attrs={'class': ['voteaza_art']}), dict(name='div', attrs={'class': ['bookmark']}), dict(name='div', attrs={'class': ['links clearfix']}), dict(name='div', attrs={'class': ['title']}) # noqa
, dict(name='div', attrs={'class':['nav']})
, dict(name='div', attrs={'class':['voteaza_art']})
, dict(name='div', attrs={'class':['bookmark']})
, dict(name='div', attrs={'class':['links clearfix']})
, dict(name='div', attrs={'class':['title']})
] ]
remove_tags_after = [ remove_tags_after = [

View File

@ -9,6 +9,7 @@ aventurilapescuit.ro
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
class AventuriLaPescuit(BasicNewsRecipe): class AventuriLaPescuit(BasicNewsRecipe):
title = u'Aventuri La Pescuit' title = u'Aventuri La Pescuit'
__author__ = u'Silviu Cotoar\u0103' __author__ = u'Silviu Cotoar\u0103'
@ -24,10 +25,7 @@ class AventuriLaPescuit(BasicNewsRecipe):
cover_url = 'http://www.aventurilapescuit.ro/images/logo.gif' cover_url = 'http://www.aventurilapescuit.ro/images/logo.gif'
conversion_options = { conversion_options = {
'comments' : description 'comments': description, 'tags': category, 'language': language, 'publisher': publisher
,'tags' : category
,'language' : language
,'publisher' : publisher
} }
keep_only_tags = [ keep_only_tags = [
@ -35,8 +33,8 @@ class AventuriLaPescuit(BasicNewsRecipe):
] ]
remove_tags = [ remove_tags = [
dict(name='div', attrs={'class':['right option']}) dict(name='div', attrs={'class': ['right option']}), dict(
, dict(name='iframe', attrs={'scrolling':['no']}) name='iframe', attrs={'scrolling': ['no']})
] ]
remove_tags_after = [ remove_tags_after = [

View File

@ -4,6 +4,8 @@ __copyright__ = '2010, BlonG'
avto-magazin.si avto-magazin.si
''' '''
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
class Dnevnik(BasicNewsRecipe): class Dnevnik(BasicNewsRecipe):
title = u'Avto Magazin' title = u'Avto Magazin'
__author__ = u'BlonG' __author__ = u'BlonG'
@ -17,7 +19,6 @@ class Dnevnik(BasicNewsRecipe):
conversion_options = {'linearize_tables': True} conversion_options = {'linearize_tables': True}
cover_url = 'https://sites.google.com/site/javno2010/home/avto_magazin_cover.jpg' cover_url = 'https://sites.google.com/site/javno2010/home/avto_magazin_cover.jpg'
extra_css = ''' extra_css = '''
@ -41,7 +42,6 @@ class Dnevnik(BasicNewsRecipe):
dict(name='div', attrs={'id': 'footer'}), dict(name='div', attrs={'id': 'footer'}),
] ]
feeds = [ feeds = [
(u'Novice', u'http://www.avto-magazin.si/rss/') (u'Novice', u'http://www.avto-magazin.si/rss/')
] ]

View File

@ -6,6 +6,7 @@ axxon.com.ar
from calibre import strftime from calibre import strftime
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
class Axxon_news(BasicNewsRecipe): class Axxon_news(BasicNewsRecipe):
title = 'Revista Axxon' title = 'Revista Axxon'
__author__ = 'Darko Miletic' __author__ = 'Darko Miletic'
@ -21,19 +22,16 @@ class Axxon_news(BasicNewsRecipe):
encoding = 'utf-8' encoding = 'utf-8'
publication_type = 'magazine' publication_type = 'magazine'
INDEX = 'http://axxon.com.ar/rev/' INDEX = 'http://axxon.com.ar/rev/'
extra_css = ' body{font-family: Verdana,Arial,sans-serif} .editorial{font-family: serif} .posttitle{font-family: "Trebuchet MS","Lucida Grande",Verdana,Arial,sans-serif} .cuento{font-family: "Times New Roman", serif} .biografia{color: red; font-weight: bold; font-family: Verdana,Geneva,Arial,Helvetica,sans-serif} ' extra_css = ' body{font-family: Verdana,Arial,sans-serif} .editorial{font-family: serif} .posttitle{font-family: "Trebuchet MS","Lucida Grande",Verdana,Arial,sans-serif} .cuento{font-family: "Times New Roman", serif} .biografia{color: red; font-weight: bold; font-family: Verdana,Geneva,Arial,Helvetica,sans-serif} ' # noqa
conversion_options = { conversion_options = {
'comment' : description 'comment': description, 'tags': category, 'publisher': publisher, 'language': language
, 'tags' : category
, 'publisher' : publisher
, 'language' : language
} }
keep_only_tags = [dict(name='div', attrs={'class': 'post'})] keep_only_tags = [dict(name='div', attrs={'class': 'post'})]
remove_tags = [dict(name=['object', 'link', 'iframe', 'embed', 'img'])] remove_tags = [dict(name=['object', 'link', 'iframe', 'embed', 'img'])]
remove_tags_after = [dict(attrs={'class':['editorial','correo','biografia','articulo']})] remove_tags_after = [
dict(attrs={'class': ['editorial', 'correo', 'biografia', 'articulo']})]
remove_attributes = ['width', 'height', 'font', 'border', 'align'] remove_attributes = ['width', 'height', 'font', 'border', 'align']
def parse_index(self): def parse_index(self):
@ -44,21 +42,16 @@ class Axxon_news(BasicNewsRecipe):
description = '' description = ''
title_prefix = '' title_prefix = ''
feed_link = item.find('a') feed_link = item.find('a')
if feed_link and feed_link.has_key('href') and feed_link['href'].startswith('?p='): if feed_link and feed_link.has_key('href') and feed_link['href'].startswith('?p='): # noqa
url = self.INDEX + feed_link['href'] url = self.INDEX + feed_link['href']
title = title_prefix + self.tag_to_string(feed_link) title = title_prefix + self.tag_to_string(feed_link)
date = strftime(self.timefmt) date = strftime(self.timefmt)
articles.append({ articles.append({
'title' :title 'title': title, 'date': date, 'url': url, 'description': description
,'date' :date
,'url' :url
,'description':description
}) })
return [(soup.head.title.string, articles)] return [(soup.head.title.string, articles)]
def preprocess_html(self, soup): def preprocess_html(self, soup):
for item in soup.findAll(style=True): for item in soup.findAll(style=True):
del item['style'] del item['style']
return self.adeify_images(soup) return self.adeify_images(soup)

View File

@ -8,6 +8,7 @@ axxon.com.ar
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import Tag from calibre.ebooks.BeautifulSoup import Tag
class Axxon_news(BasicNewsRecipe): class Axxon_news(BasicNewsRecipe):
title = 'Axxon noticias' title = 'Axxon noticias'
__author__ = 'Darko Miletic' __author__ = 'Darko Miletic'
@ -23,14 +24,9 @@ class Axxon_news(BasicNewsRecipe):
lang = 'es-AR' lang = 'es-AR'
conversion_options = { conversion_options = {
'comment' : description 'comment': description, 'tags': category, 'publisher': publisher, 'language': lang, 'pretty_print': True
, 'tags' : category
, 'publisher' : publisher
, 'language' : lang
, 'pretty_print' : True
} }
keep_only_tags = [dict(name='div', attrs={'class': 'post'})] keep_only_tags = [dict(name='div', attrs={'class': 'post'})]
remove_tags = [dict(name=['object', 'link', 'iframe', 'embed'])] remove_tags = [dict(name=['object', 'link', 'iframe', 'embed'])]
@ -39,11 +35,10 @@ class Axxon_news(BasicNewsRecipe):
remove_attributes = ['style', 'width', 'height', 'font', 'border', 'align'] remove_attributes = ['style', 'width', 'height', 'font', 'border', 'align']
def adeify_images2(cls, soup): def adeify_images2(cls, soup):
for item in soup.findAll('img'): for item in soup.findAll('img'):
for attrib in ['height', 'width', 'border', 'align', 'style']: for attrib in ['height', 'width', 'border', 'align', 'style']:
if item.has_key(attrib): if item.has_key(attrib): # noqa
del item[attrib] del item[attrib]
oldParent = item.parent oldParent = item.parent
if oldParent.name == 'a': if oldParent.name == 'a':
@ -56,7 +51,7 @@ class Axxon_news(BasicNewsRecipe):
def preprocess_html(self, soup): def preprocess_html(self, soup):
soup.html['xml:lang'] = self.lang soup.html['xml:lang'] = self.lang
soup.html['lang'] = self.lang soup.html['lang'] = self.lang
mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)]) mlang = Tag(soup, 'meta', [
("http-equiv", "Content-Language"), ("content", self.lang)])
soup.html.insert(0, mlang) soup.html.insert(0, mlang)
return self.adeify_images2(soup) return self.adeify_images2(soup)

View File

@ -7,6 +7,7 @@ azstarnet.com
import urllib import urllib
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
class Azstarnet(BasicNewsRecipe): class Azstarnet(BasicNewsRecipe):
title = 'Arizona Daily Star' title = 'Arizona Daily Star'
__author__ = 'Darko Miletic' __author__ = 'Darko Miletic'
@ -23,36 +24,29 @@ class Azstarnet(BasicNewsRecipe):
needs_subscription = True needs_subscription = True
conversion_options = { conversion_options = {
'comment' : description 'comment': description, 'tags': category, 'publisher': publisher, 'language': language
, 'tags' : category
, 'publisher' : publisher
, 'language' : language
} }
def get_browser(self): def get_browser(self):
br = BasicNewsRecipe.get_browser(self) br = BasicNewsRecipe.get_browser(self)
br.open('http://azstarnet.com/') br.open('http://azstarnet.com/')
if self.username is not None and self.password is not None: if self.username is not None and self.password is not None:
data = urllib.urlencode({ 'm':'login' data = urllib.urlencode({'m': 'login', 'u': self.username, 'p': self.password, 'z': 'http://azstarnet.com/'
,'u':self.username
,'p':self.password
,'z':'http://azstarnet.com/'
}) })
br.open('http://azstarnet.com/app/registration/proxy.php', data) br.open('http://azstarnet.com/app/registration/proxy.php', data)
return br return br
remove_tags = [dict(name=['object', 'link', 'iframe', 'base', 'img'])] remove_tags = [dict(name=['object', 'link', 'iframe', 'base', 'img'])]
feeds = [ feeds = [
(u'Local News' , u'http://azstarnet.com/search/?f=rss&t=article&c=news/local&l=25&s=start_time&sd=desc')
,(u'National News' , u'http://azstarnet.com/search/?f=rss&t=article&c=news/national&l=25&s=start_time&sd=desc') (u'Local News', u'http://azstarnet.com/search/?f=rss&t=article&c=news/local&l=25&s=start_time&sd=desc'),
,(u'World News' , u'http://azstarnet.com/search/?f=rss&t=article&c=news/world&l=25&s=start_time&sd=desc') (u'National News', u'http://azstarnet.com/search/?f=rss&t=article&c=news/national&l=25&s=start_time&sd=desc'),
,(u'Sports' , u'http://azstarnet.com/search/?f=rss&t=article&c=sports&l=25&s=start_time&sd=desc') (u'World News', u'http://azstarnet.com/search/?f=rss&t=article&c=news/world&l=25&s=start_time&sd=desc'),
,(u'Opinion' , u'http://azstarnet.com/search/?f=rss&t=article&c=news/opinion&l=25&s=start_time&sd=desc') (u'Sports', u'http://azstarnet.com/search/?f=rss&t=article&c=sports&l=25&s=start_time&sd=desc'),
,(u'Movies' , u'http://azstarnet.com/search/?f=rss&t=article&c=entertainment/movies&l=25&s=start_time&sd=desc') (u'Opinion', u'http://azstarnet.com/search/?f=rss&t=article&c=news/opinion&l=25&s=start_time&sd=desc'),
,(u'Food' , u'http://azstarnet.com/search/?f=rss&t=article&c=lifestyles/food-and-cooking&l=25&s=start_time&sd=desc') (u'Movies', u'http://azstarnet.com/search/?f=rss&t=article&c=entertainment/movies&l=25&s=start_time&sd=desc'),
(u'Food', u'http://azstarnet.com/search/?f=rss&t=article&c=lifestyles/food-and-cooking&l=25&s=start_time&sd=desc')
] ]
def preprocess_html(self, soup): def preprocess_html(self, soup):
@ -62,4 +56,3 @@ class Azstarnet(BasicNewsRecipe):
def print_version(self, url): def print_version(self, url):
return url + '?print=1' return url + '?print=1'

View File

@ -8,6 +8,7 @@ b365.realitatea.net
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
class b365Realitatea(BasicNewsRecipe): class b365Realitatea(BasicNewsRecipe):
title = u'b365 Realitatea' title = u'b365 Realitatea'
__author__ = u'Silviu Cotoar\u0103' __author__ = u'Silviu Cotoar\u0103'
@ -23,10 +24,7 @@ class b365Realitatea(BasicNewsRecipe):
cover_url = 'http://b365.realitatea.net/wp-content/themes/b/images/b365-logo.png' cover_url = 'http://b365.realitatea.net/wp-content/themes/b/images/b365-logo.png'
conversion_options = { conversion_options = {
'comments' : description 'comments': description, 'tags': category, 'language': language, 'publisher': publisher
,'tags' : category
,'language' : language
,'publisher' : publisher
} }
keep_only_tags = [ keep_only_tags = [
@ -34,10 +32,8 @@ class b365Realitatea(BasicNewsRecipe):
] ]
remove_tags = [ remove_tags = [
dict(name='div', attrs={'class':'date'}) dict(name='div', attrs={'class': 'date'}), dict(name='dic', attrs={'class': 'addthis_toolbox addthis_default_style'}), dict(
, dict(name='dic', attrs={'class':'addthis_toolbox addthis_default_style'}) name='div', attrs={'class': 'related_posts'}), dict(name='div', attrs={'id': 'RelevantiWidget'})
, dict(name='div', attrs={'class':'related_posts'})
, dict(name='div', attrs={'id':'RelevantiWidget'})
] ]
remove_tags_after = [ remove_tags_after = [
@ -49,4 +45,3 @@ class b365Realitatea(BasicNewsRecipe):
def preprocess_html(self, soup): def preprocess_html(self, soup):
return self.adeify_images(soup) return self.adeify_images(soup)

View File

@ -7,6 +7,7 @@ b92.net
import re import re
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
class B92(BasicNewsRecipe): class B92(BasicNewsRecipe):
title = 'B92' title = 'B92'
__author__ = 'Darko Miletic' __author__ = 'Darko Miletic'
@ -30,33 +31,32 @@ class B92(BasicNewsRecipe):
""" """
conversion_options = { conversion_options = {
'comment' : description 'comment': description, 'tags': category, 'publisher': publisher, 'language': language, 'linearize_tables': True
, 'tags' : category
, 'publisher': publisher
, 'language' : language
, 'linearize_tables' : True
} }
preprocess_regexps = [ preprocess_regexps = [
(re.compile(u'\u0110'), lambda match: u'\u00D0'), (re.compile(u'\u0110'), lambda match: u'\u00D0'),
(re.compile(r'<html.*?<body>', re.DOTALL|re.IGNORECASE), lambda match: '<html><head><title>something</title></head><body>') (re.compile(r'<html.*?<body>', re.DOTALL | re.IGNORECASE),
lambda match: '<html><head><title>something</title></head><body>')
] ]
keep_only_tags = [dict(attrs={'class': ['article-info1', 'article-text']})] keep_only_tags = [dict(attrs={'class': ['article-info1', 'article-text']})]
remove_attributes = ['width','height','align','hspace','vspace','border','lang','xmlns:fb'] remove_attributes = ['width', 'height', 'align',
'hspace', 'vspace', 'border', 'lang', 'xmlns:fb']
remove_tags = [ remove_tags = [
dict(name=['embed','link','base','meta','iframe']) dict(name=['embed', 'link', 'base', 'meta', 'iframe']), dict(
,dict(attrs={'id':'social'}) attrs={'id': 'social'})
] ]
feeds = [ feeds = [
(u'Vesti' , u'http://www.b92.net/info/rss/vesti.xml' )
,(u'Biz' , u'http://www.b92.net/info/rss/biz.xml' ) (u'Vesti', u'http://www.b92.net/info/rss/vesti.xml'),
,(u'Sport' , u'http://www.b92.net/info/rss/sport.xml' ) (u'Biz', u'http://www.b92.net/info/rss/biz.xml'),
,(u'Zivot' , u'http://www.b92.net/info/rss/zivot.xml' ) (u'Sport', u'http://www.b92.net/info/rss/sport.xml'),
,(u'Kultura' , u'http://www.b92.net/info/rss/kultura.xml' ) (u'Zivot', u'http://www.b92.net/info/rss/zivot.xml'),
,(u'Automobili' , u'http://www.b92.net/info/rss/automobili.xml') (u'Kultura', u'http://www.b92.net/info/rss/kultura.xml'),
,(u'Tehnopolis' , u'http://www.b92.net/info/rss/tehnopolis.xml') (u'Automobili', u'http://www.b92.net/info/rss/automobili.xml'),
(u'Tehnopolis', u'http://www.b92.net/info/rss/tehnopolis.xml')
] ]
def preprocess_html(self, soup): def preprocess_html(self, soup):

View File

@ -7,6 +7,7 @@ www.buenosairesherald.com
from calibre import strftime from calibre import strftime
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
class BuenosAiresHerald(BasicNewsRecipe): class BuenosAiresHerald(BasicNewsRecipe):
title = 'Buenos Aires Herald' title = 'Buenos Aires Herald'
__author__ = 'Darko Miletic' __author__ = 'Darko Miletic'
@ -31,22 +32,19 @@ class BuenosAiresHerald(BasicNewsRecipe):
""" """
conversion_options = { conversion_options = {
'comment' : description 'comment': description, 'tags': category, 'publisher': publisher, 'language': language
, 'tags' : category
, 'publisher' : publisher
, 'language' : language
} }
remove_tags = [dict(name=['meta', 'link', 'iframe'])] remove_tags = [dict(name=['meta', 'link', 'iframe'])]
keep_only_tags = [dict(attrs={'class': 'nota_texto p'})] keep_only_tags = [dict(attrs={'class': 'nota_texto p'})]
feeds = [ feeds = [
(u'Argentina' , u'http://www.buenosairesherald.com/argentina' )
,(u'World' , u'http://www.buenosairesherald.com/world' ) (u'Argentina', u'http://www.buenosairesherald.com/argentina'),
,(u'Latin America' , u'http://www.buenosairesherald.com/latin-america' ) (u'World', u'http://www.buenosairesherald.com/world'),
,(u'Entertainment' , u'http://www.buenosairesherald.com/entertainment' ) (u'Latin America', u'http://www.buenosairesherald.com/latin-america'),
,(u'Sports' , u'http://www.buenosairesherald.com/sports' ) (u'Entertainment', u'http://www.buenosairesherald.com/entertainment'),
(u'Sports', u'http://www.buenosairesherald.com/sports')
] ]
def print_version(self, url): def print_version(self, url):
@ -54,27 +52,24 @@ class BuenosAiresHerald(BasicNewsRecipe):
artid = artidraw.partition('/')[0] artid = artidraw.partition('/')[0]
return 'http://www.buenosairesherald.com/articles/print.aspx?ix=' + artid return 'http://www.buenosairesherald.com/articles/print.aspx?ix=' + artid
def parse_index(self): def parse_index(self):
totalfeeds = [] totalfeeds = []
lfeeds = self.get_feeds() lfeeds = self.get_feeds()
for feedobj in lfeeds: for feedobj in lfeeds:
feedtitle, feedurl = feedobj feedtitle, feedurl = feedobj
self.report_progress(0, ('Fetching feed')+' %s...'%(feedtitle if feedtitle else feedurl)) self.report_progress(0, ('Fetching feed') + ' %s...' %
(feedtitle if feedtitle else feedurl))
articles = [] articles = []
soup = self.index_to_soup(feedurl) soup = self.index_to_soup(feedurl)
for item in soup.findAll('div', attrs={'class': 'nota_texto_seccion'}): for item in soup.findAll('div', attrs={'class': 'nota_texto_seccion'}):
description = self.tag_to_string(item.h2) description = self.tag_to_string(item.h2)
atag = item.h2.find('a') atag = item.h2.find('a')
if atag and atag.has_key('href'): if atag and atag.has_key('href'): # noqa
url = self.INDEX + atag['href'] url = self.INDEX + atag['href']
title = description title = description
date = strftime(self.timefmt) date = strftime(self.timefmt)
articles.append({ articles.append({
'title' :title 'title': title, 'date': date, 'url': url, 'description': description
,'date' :date
,'url' :url
,'description':description
}) })
totalfeeds.append((feedtitle, articles)) totalfeeds.append((feedtitle, articles))
return totalfeeds return totalfeeds

Some files were not shown because too many files have changed in this diff Show More