mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Perform PEP8 compliance checks on the entire codebase
Some bits of PEP 8 are turned off via setup.cfg
This commit is contained in:
parent
643977ffa6
commit
567040ee1e
@ -1,13 +1,13 @@
|
||||
#!/usr/bin/env python2
|
||||
##
|
||||
## Title: Diario 10minutos.com.uy News and Sports Calibre Recipe
|
||||
## Contact: Carlos Alves - <carlos@carlosalves.info>
|
||||
# Title: Diario 10minutos.com.uy News and Sports Calibre Recipe
|
||||
# Contact: Carlos Alves - <carlos@carlosalves.info>
|
||||
##
|
||||
## License: GNU General Public License v3 - http://www.gnu.org/copyleft/gpl.html
|
||||
## Copyright: Carlos Alves - <carlos@carlosalves.info>
|
||||
# License: GNU General Public License v3 - http://www.gnu.org/copyleft/gpl.html
|
||||
# Copyright: Carlos Alves - <carlos@carlosalves.info>
|
||||
##
|
||||
## Written: September 2013
|
||||
## Last Edited: 2016-01-11
|
||||
# Written: September 2013
|
||||
# Last Edited: 2016-01-11
|
||||
##
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
@ -18,6 +18,7 @@ __author__ = '2016, Carlos Alves <carlos@carlosalves.info>'
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class General(BasicNewsRecipe):
|
||||
title = '10minutos'
|
||||
__author__ = 'Carlos Alves'
|
||||
@ -33,13 +34,13 @@ class General(BasicNewsRecipe):
|
||||
|
||||
oldest_article = 2
|
||||
max_articles_per_feed = 100
|
||||
keep_only_tags = [dict(name='div', attrs={'class':'post-content'})]
|
||||
keep_only_tags = [dict(name='div', attrs={'class': 'post-content'})]
|
||||
|
||||
remove_tags = [
|
||||
dict(name='div', attrs={'class':['hr', 'titlebar', 'navigation']}),
|
||||
dict(name='div', attrs={'class':'sharedaddy sd-sharing-enabled'}),
|
||||
dict(name='p', attrs={'class':'post-meta'}),
|
||||
dict(name=['object','link'])
|
||||
dict(name='div', attrs={'class': ['hr', 'titlebar', 'navigation']}),
|
||||
dict(name='div', attrs={'class': 'sharedaddy sd-sharing-enabled'}),
|
||||
dict(name='p', attrs={'class': 'post-meta'}),
|
||||
dict(name=['object', 'link'])
|
||||
]
|
||||
|
||||
extra_css = '''
|
||||
|
@ -1,6 +1,6 @@
|
||||
#!/usr/bin/env python2
|
||||
##
|
||||
## Last Edited: 2016-01-11 Carlos Alves <carlos@carlosalves.info>
|
||||
# Last Edited: 2016-01-11 Carlos Alves <carlos@carlosalves.info>
|
||||
##
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
@ -11,6 +11,7 @@ __author__ = '2010, Gustavo Azambuja <hola at gazambuja.com>'
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class Noticias(BasicNewsRecipe):
|
||||
title = '180.com.uy'
|
||||
__author__ = 'Gustavo Azambuja'
|
||||
@ -27,15 +28,15 @@ class Noticias(BasicNewsRecipe):
|
||||
max_articles_per_feed = 100
|
||||
remove_tags_after = dict(name='article')
|
||||
keep_only_tags = [
|
||||
dict(name='h3', attrs={'class':'title'}),
|
||||
dict(name='div', attrs={'class':'copete'}),
|
||||
dict(name='article', attrs={'class':'texto'})
|
||||
dict(name='h3', attrs={'class': 'title'}),
|
||||
dict(name='div', attrs={'class': 'copete'}),
|
||||
dict(name='article', attrs={'class': 'texto'})
|
||||
]
|
||||
remove_tags = [
|
||||
dict(name=['object','link'])
|
||||
dict(name=['object', 'link'])
|
||||
]
|
||||
|
||||
remove_attributes = ['width','height', 'style', 'font', 'color']
|
||||
remove_attributes = ['width', 'height', 'style', 'font', 'color']
|
||||
|
||||
extra_css = '''
|
||||
h1{font-family:Geneva, Arial, Helvetica, sans-serif;color:#154B7A;}
|
||||
@ -50,9 +51,7 @@ class Noticias(BasicNewsRecipe):
|
||||
def get_cover_url(self):
|
||||
pass
|
||||
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
return soup
|
||||
|
||||
|
@ -22,7 +22,7 @@ class E1843(BasicNewsRecipe):
|
||||
encoding = 'utf-8'
|
||||
|
||||
keep_only_tags = [
|
||||
dict(name='h1', attrs={'class':'title'}),
|
||||
dict(name='h1', attrs={'class': 'title'}),
|
||||
classes('field-name-field-rubric-summary article-header__overlay-main-image meta-info__author article__body'),
|
||||
]
|
||||
|
||||
@ -54,7 +54,8 @@ class E1843(BasicNewsRecipe):
|
||||
r = div.find(**classes('article-rubric'))
|
||||
if r is not None:
|
||||
desc = self.tag_to_string(r)
|
||||
articles.append({'title':title, 'url':url, 'description':desc})
|
||||
articles.append(
|
||||
{'title': title, 'url': url, 'description': desc})
|
||||
|
||||
if current_section and articles:
|
||||
ans.append((current_section, articles))
|
||||
|
@ -10,6 +10,7 @@ www.20minutos.es
|
||||
import re
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class AdvancedUserRecipe1294946868(BasicNewsRecipe):
|
||||
|
||||
title = u'20 Minutos new'
|
||||
@ -32,23 +33,15 @@ class AdvancedUserRecipe1294946868(BasicNewsRecipe):
|
||||
remove_empty_feeds = True
|
||||
|
||||
keep_only_tags = [
|
||||
dict(name='div', attrs={'id':['content','vinetas',]})
|
||||
,dict(name='div', attrs={'class':['boxed','description','lead','article-content','cuerpo estirar']})
|
||||
,dict(name='span', attrs={'class':['photo-bar']})
|
||||
,dict(name='ul', attrs={'class':['article-author']})
|
||||
dict(name='div', attrs={'id': ['content', 'vinetas', ]}), dict(name='div', attrs={'class': ['boxed', 'description', 'lead', 'article-content', 'cuerpo estirar']}), dict(name='span', attrs={'class': ['photo-bar']}), dict(name='ul', attrs={'class': ['article-author']}) # noqa
|
||||
]
|
||||
|
||||
remove_tags_before = dict(name='ul' , attrs={'class':['servicios-sub']})
|
||||
remove_tags_after = dict(name='div' , attrs={'class':['related-news','col']})
|
||||
remove_tags_before = dict(name='ul', attrs={'class': ['servicios-sub']})
|
||||
remove_tags_after = dict(
|
||||
name='div', attrs={'class': ['related-news', 'col']})
|
||||
|
||||
remove_tags = [
|
||||
dict(name='ol', attrs={'class':['navigation',]})
|
||||
,dict(name='span', attrs={'class':['action']})
|
||||
,dict(name='div', attrs={'class':['twitter comments-list hidden','related-news','col','photo-gallery','photo-gallery side-art-block','calendario','article-comment','postto estirar','otras_vinetas estirar','kment','user-actions']})
|
||||
,dict(name='div', attrs={'id':['twitter-destacados','eco-tabs','inner','vineta_calendario','vinetistas clearfix','otras_vinetas estirar','MIN1','main','SUP1','INT']})
|
||||
,dict(name='ul', attrs={'class':['article-user-actions','stripped-list']})
|
||||
,dict(name='ul', attrs={'id':['site-links']})
|
||||
,dict(name='li', attrs={'class':['puntuacion','enviar','compartir']})
|
||||
dict(name='ol', attrs={'class': ['navigation', ]}), dict(name='span', attrs={'class': ['action']}), dict(name='div', attrs={'class': ['twitter comments-list hidden', 'related-news', 'col', 'photo-gallery', 'photo-gallery side-art-block', 'calendario', 'article-comment', 'postto estirar', 'otras_vinetas estirar', 'kment', 'user-actions']}), dict( name='div', attrs={'id': ['twitter-destacados', 'eco-tabs', 'inner', 'vineta_calendario', 'vinetistas clearfix', 'otras_vinetas estirar', 'MIN1', 'main', 'SUP1', 'INT']}), dict(name='ul', attrs={'class': ['article-user-actions', 'stripped-list']}), dict(name='ul', attrs={'id': ['site-links']}), dict(name='li', attrs={'class': ['puntuacion', 'enviar', 'compartir']}) # noqa
|
||||
]
|
||||
|
||||
extra_css = """
|
||||
@ -57,23 +50,25 @@ class AdvancedUserRecipe1294946868(BasicNewsRecipe):
|
||||
h3{font-family: sans-serif; font-size:150%; font-weight:bold; text-align: justify; }
|
||||
"""
|
||||
|
||||
preprocess_regexps = [(re.compile(r'<a href="http://estaticos.*?[0-999]px;" target="_blank">', re.DOTALL), lambda m: '')]
|
||||
preprocess_regexps = [(re.compile(
|
||||
r'<a href="http://estaticos.*?[0-999]px;" target="_blank">', re.DOTALL), lambda m: '')]
|
||||
|
||||
feeds = [
|
||||
(u'Portada' , u'http://www.20minutos.es/rss/')
|
||||
,(u'Nacional' , u'http://www.20minutos.es/rss/nacional/')
|
||||
,(u'Internacional' , u'http://www.20minutos.es/rss/internacional/')
|
||||
,(u'Economia' , u'http://www.20minutos.es/rss/economia/')
|
||||
,(u'Deportes' , u'http://www.20minutos.es/rss/deportes/')
|
||||
,(u'Tecnologia' , u'http://www.20minutos.es/rss/tecnologia/')
|
||||
,(u'Gente - TV' , u'http://www.20minutos.es/rss/gente-television/')
|
||||
,(u'Motor' , u'http://www.20minutos.es/rss/motor/')
|
||||
,(u'Salud' , u'http://www.20minutos.es/rss/belleza-y-salud/')
|
||||
,(u'Viajes' , u'http://www.20minutos.es/rss/viajes/')
|
||||
,(u'Vivienda' , u'http://www.20minutos.es/rss/vivienda/')
|
||||
,(u'Empleo' , u'http://www.20minutos.es/rss/empleo/')
|
||||
,(u'Cine' , u'http://www.20minutos.es/rss/cine/')
|
||||
,(u'Musica' , u'http://www.20minutos.es/rss/musica/')
|
||||
,(u'Vinetas' , u'http://www.20minutos.es/rss/vinetas/')
|
||||
,(u'Comunidad20' , u'http://www.20minutos.es/rss/zona20/')
|
||||
|
||||
(u'Portada', u'http://www.20minutos.es/rss/'),
|
||||
(u'Nacional', u'http://www.20minutos.es/rss/nacional/'),
|
||||
(u'Internacional', u'http://www.20minutos.es/rss/internacional/'),
|
||||
(u'Economia', u'http://www.20minutos.es/rss/economia/'),
|
||||
(u'Deportes', u'http://www.20minutos.es/rss/deportes/'),
|
||||
(u'Tecnologia', u'http://www.20minutos.es/rss/tecnologia/'),
|
||||
(u'Gente - TV', u'http://www.20minutos.es/rss/gente-television/'),
|
||||
(u'Motor', u'http://www.20minutos.es/rss/motor/'),
|
||||
(u'Salud', u'http://www.20minutos.es/rss/belleza-y-salud/'),
|
||||
(u'Viajes', u'http://www.20minutos.es/rss/viajes/'),
|
||||
(u'Vivienda', u'http://www.20minutos.es/rss/vivienda/'),
|
||||
(u'Empleo', u'http://www.20minutos.es/rss/empleo/'),
|
||||
(u'Cine', u'http://www.20minutos.es/rss/cine/'),
|
||||
(u'Musica', u'http://www.20minutos.es/rss/musica/'),
|
||||
(u'Vinetas', u'http://www.20minutos.es/rss/vinetas/'),
|
||||
(u'Comunidad20', u'http://www.20minutos.es/rss/zona20/')
|
||||
]
|
||||
|
@ -6,6 +6,7 @@ __copyright__ = '2011 Aurélien Chabot <contact@aurelienchabot.fr>'
|
||||
'''
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
|
||||
|
||||
class Minutes(BasicNewsRecipe):
|
||||
|
||||
title = '20 minutes'
|
||||
@ -24,10 +25,10 @@ class Minutes(BasicNewsRecipe):
|
||||
|
||||
keep_only_tags = [
|
||||
dict(name='h1'),
|
||||
dict(attrs={'class':lambda x: x and 'lt-content' in x.split()}),
|
||||
dict(attrs={'class': lambda x: x and 'lt-content' in x.split()}),
|
||||
]
|
||||
remove_tags = [
|
||||
dict(attrs={'class':lambda x:x and 'content-related' in x.split()}),
|
||||
dict(attrs={'class': lambda x: x and 'content-related' in x.split()}),
|
||||
]
|
||||
remove_tags_after = dict(id='ob_holder')
|
||||
|
||||
|
@ -6,10 +6,11 @@ www.20minutos.es
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class t20Minutos(BasicNewsRecipe):
|
||||
title = '20 Minutos'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'Diario de informacion general y local mas leido de Espania, noticias de ultima hora de Espania, el mundo, local, deportes, noticias curiosas y mas'
|
||||
description = 'Diario de informacion general y local mas leido de Espania, noticias de ultima hora de Espania, el mundo, local, deportes, noticias curiosas y mas' # noqa
|
||||
publisher = '20 Minutos Online SL'
|
||||
category = 'news, politics, Spain'
|
||||
oldest_article = 2
|
||||
@ -27,23 +28,21 @@ class t20Minutos(BasicNewsRecipe):
|
||||
"""
|
||||
|
||||
conversion_options = {
|
||||
'comment' : description
|
||||
, 'tags' : category
|
||||
, 'publisher' : publisher
|
||||
, 'language' : language
|
||||
'comment': description, 'tags': category, 'publisher': publisher, 'language': language
|
||||
}
|
||||
|
||||
remove_tags = [dict(attrs={'class':'mf-viral'})]
|
||||
remove_attributes=['border']
|
||||
remove_tags = [dict(attrs={'class': 'mf-viral'})]
|
||||
remove_attributes = ['border']
|
||||
|
||||
feeds = [
|
||||
(u'Principal' , u'http://20minutos.feedsportal.com/c/32489/f/478284/index.rss')
|
||||
,(u'Cine' , u'http://20minutos.feedsportal.com/c/32489/f/478285/index.rss')
|
||||
,(u'Internacional' , u'http://20minutos.feedsportal.com/c/32489/f/492689/index.rss')
|
||||
,(u'Deportes' , u'http://20minutos.feedsportal.com/c/32489/f/478286/index.rss')
|
||||
,(u'Nacional' , u'http://20minutos.feedsportal.com/c/32489/f/492688/index.rss')
|
||||
,(u'Economia' , u'http://20minutos.feedsportal.com/c/32489/f/492690/index.rss')
|
||||
,(u'Tecnologia' , u'http://20minutos.feedsportal.com/c/32489/f/478292/index.rss')
|
||||
|
||||
(u'Principal', u'http://20minutos.feedsportal.com/c/32489/f/478284/index.rss'),
|
||||
(u'Cine', u'http://20minutos.feedsportal.com/c/32489/f/478285/index.rss'),
|
||||
(u'Internacional', u'http://20minutos.feedsportal.com/c/32489/f/492689/index.rss'),
|
||||
(u'Deportes', u'http://20minutos.feedsportal.com/c/32489/f/478286/index.rss'),
|
||||
(u'Nacional', u'http://20minutos.feedsportal.com/c/32489/f/492688/index.rss'),
|
||||
(u'Economia', u'http://20minutos.feedsportal.com/c/32489/f/492690/index.rss'),
|
||||
(u'Tecnologia', u'http://20minutos.feedsportal.com/c/32489/f/478292/index.rss')
|
||||
]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
@ -62,7 +61,6 @@ class t20Minutos(BasicNewsRecipe):
|
||||
str = self.tag_to_string(item)
|
||||
item.replaceWith(str)
|
||||
for item in soup.findAll('img'):
|
||||
if not item.has_key('alt'):
|
||||
if not item.has_key('alt'): # noqa
|
||||
item['alt'] = 'image'
|
||||
return soup
|
||||
|
||||
|
@ -11,6 +11,7 @@ import re
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
from calibre.ebooks.BeautifulSoup import Tag
|
||||
|
||||
|
||||
class Cro24Sata(BasicNewsRecipe):
|
||||
title = '24 Sata - Hr'
|
||||
__author__ = 'Darko Miletic'
|
||||
@ -27,35 +28,33 @@ class Cro24Sata(BasicNewsRecipe):
|
||||
|
||||
lang = 'hr-HR'
|
||||
|
||||
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: serif1, serif}'
|
||||
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: serif1, serif}' # noqa
|
||||
|
||||
conversion_options = {
|
||||
'comment' : description
|
||||
, 'tags' : category
|
||||
, 'publisher' : publisher
|
||||
, 'language' : lang
|
||||
, 'pretty_print' : True
|
||||
'comment': description, 'tags': category, 'publisher': publisher, 'language': lang, 'pretty_print': True
|
||||
}
|
||||
|
||||
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
||||
|
||||
remove_tags = [
|
||||
dict(name=['object','link','embed'])
|
||||
,dict(name='table', attrs={'class':'enumbox'})
|
||||
dict(name=['object', 'link', 'embed']), dict(
|
||||
name='table', attrs={'class': 'enumbox'})
|
||||
]
|
||||
|
||||
feeds = [(u'Najnovije Vijesti', u'http://www.24sata.hr/index.php?cmd=show_rss&action=novo')]
|
||||
feeds = [(u'Najnovije Vijesti',
|
||||
u'http://www.24sata.hr/index.php?cmd=show_rss&action=novo')]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
soup.html['lang'] = self.lang
|
||||
mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
|
||||
mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=UTF-8")])
|
||||
soup.head.insert(0,mlang)
|
||||
soup.head.insert(1,mcharset)
|
||||
mlang = Tag(soup, 'meta', [
|
||||
("http-equiv", "Content-Language"), ("content", self.lang)])
|
||||
mcharset = Tag(soup, 'meta', [
|
||||
("http-equiv", "Content-Type"), ("content", "text/html; charset=UTF-8")])
|
||||
soup.head.insert(0, mlang)
|
||||
soup.head.insert(1, mcharset)
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
return soup
|
||||
|
||||
def print_version(self, url):
|
||||
return url + '&action=ispis'
|
||||
|
||||
|
@ -10,6 +10,7 @@ __copyright__ = '2009-2012, Darko Miletic <darko.miletic at gmail.com>'
|
||||
import re
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
|
||||
|
||||
class Ser24Sata(BasicNewsRecipe):
|
||||
title = '24 Sata - Sr'
|
||||
__author__ = 'Darko Miletic'
|
||||
@ -29,20 +30,17 @@ class Ser24Sata(BasicNewsRecipe):
|
||||
"""
|
||||
|
||||
conversion_options = {
|
||||
'comment' : description
|
||||
, 'tags' : category
|
||||
, 'publisher': publisher
|
||||
, 'language' : language
|
||||
'comment': description, 'tags': category, 'publisher': publisher, 'language': language
|
||||
}
|
||||
|
||||
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
||||
|
||||
feeds = [
|
||||
(u'Vesti' , u'http://www.24sata.rs/rss/vesti.xml' ),
|
||||
(u'Sport' , u'http://www.24sata.rs/rss/sport.xml' ),
|
||||
(u'Šou' , u'http://www.24sata.rs/rss/sou.xml' ),
|
||||
(u'Vesti', u'http://www.24sata.rs/rss/vesti.xml'),
|
||||
(u'Sport', u'http://www.24sata.rs/rss/sport.xml'),
|
||||
(u'Šou', u'http://www.24sata.rs/rss/sou.xml'),
|
||||
(u'Specijal', u'http://www.24sata.rs/rss/specijal.xml'),
|
||||
(u'Novi Sad', u'http://www.24sata.rs/rss/ns.xml' )
|
||||
(u'Novi Sad', u'http://www.24sata.rs/rss/ns.xml')
|
||||
]
|
||||
|
||||
def print_version(self, url):
|
||||
|
@ -3,6 +3,7 @@
|
||||
from __future__ import unicode_literals, division, absolute_import, print_function
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class AdvancedUserRecipe1438446837(BasicNewsRecipe):
|
||||
title = '3DNews: Daily Digital Digest'
|
||||
__author__ = 'bugmen00t'
|
||||
@ -17,30 +18,48 @@ class AdvancedUserRecipe1438446837(BasicNewsRecipe):
|
||||
max_articles_per_feed = 60
|
||||
|
||||
feeds = [
|
||||
('\u041d\u043e\u0432\u043e\u0441\u0442\u0438 Hardware', 'http://www.3dnews.ru/news/rss/'),
|
||||
('\u041d\u043e\u0432\u043e\u0441\u0442\u0438 Software', 'http://www.3dnews.ru/software-news/rss/'),
|
||||
('\u0423\u043c\u043d\u044b\u0435 \u0432\u0435\u0449\u0438', 'http://www.3dnews.ru/smart-things/rss/'),
|
||||
('\u0410\u043d\u0430\u043b\u0438\u0442\u0438\u043a\u0430', 'http://www.3dnews.ru/editorial/rss/'),
|
||||
('\u041f\u0440\u043e\u0446\u0435\u0441\u0441\u043e\u0440\u044b \u0438 \u043f\u0430\u043c\u044f\u0442\u044c', 'http://www.3dnews.ru/cpu/rss/'),
|
||||
('\u041c\u0430\u0442\u0435\u0440\u0438\u043d\u0441\u043a\u0438\u0435 \u043f\u043b\u0430\u0442\u044b', 'http://www.3dnews.ru/motherboard/rss/'),
|
||||
('\u041d\u043e\u0432\u043e\u0441\u0442\u0438 Hardware',
|
||||
'http://www.3dnews.ru/news/rss/'),
|
||||
('\u041d\u043e\u0432\u043e\u0441\u0442\u0438 Software',
|
||||
'http://www.3dnews.ru/software-news/rss/'),
|
||||
('\u0423\u043c\u043d\u044b\u0435 \u0432\u0435\u0449\u0438',
|
||||
'http://www.3dnews.ru/smart-things/rss/'),
|
||||
('\u0410\u043d\u0430\u043b\u0438\u0442\u0438\u043a\u0430',
|
||||
'http://www.3dnews.ru/editorial/rss/'),
|
||||
('\u041f\u0440\u043e\u0446\u0435\u0441\u0441\u043e\u0440\u044b \u0438 \u043f\u0430\u043c\u044f\u0442\u044c',
|
||||
'http://www.3dnews.ru/cpu/rss/'),
|
||||
('\u041c\u0430\u0442\u0435\u0440\u0438\u043d\u0441\u043a\u0438\u0435 \u043f\u043b\u0430\u0442\u044b',
|
||||
'http://www.3dnews.ru/motherboard/rss/'),
|
||||
('\u041a\u043e\u0440\u043f\u0443\u0441\u0430, \u0411\u041f \u0438 \u043e\u0445\u043b\u0430\u0436\u0434\u0435\u043d\u0438\u0435',
|
||||
'http://www.3dnews.ru/cooling/rss/'),
|
||||
('\u0412\u0438\u0434\u0435\u043e\u043a\u0430\u0440\u0442\u044b', 'http://www.3dnews.ru/video/rss/'),
|
||||
('\u041c\u043e\u043d\u0438\u0442\u043e\u0440\u044b \u0438 \u043f\u0440\u043e\u0435\u043a\u0442\u043e\u0440\u044b', 'http://www.3dnews.ru/display/rss/'),
|
||||
('\u041d\u0430\u043a\u043e\u043f\u0438\u0442\u0435\u043b\u0438', 'http://www.3dnews.ru/storage/rss/'),
|
||||
('\u0426\u0438\u0444\u0440\u043e\u0432\u043e\u0439 \u0430\u0432\u0442\u043e\u043c\u043e\u0431\u0438\u043b\u044c', 'http://www.3dnews.ru/auto/rss/'),
|
||||
('\u0421\u043e\u0442\u043e\u0432\u0430\u044f \u0441\u0432\u044f\u0437\u044c', 'http://www.3dnews.ru/phone/rss/'),
|
||||
('\u041f\u0435\u0440\u0438\u0444\u0435\u0440\u0438\u044f', 'http://www.3dnews.ru/peripheral/rss/'),
|
||||
('\u041d\u043e\u0443\u0442\u0431\u0443\u043a\u0438 \u0438 \u041f\u041a', 'http://www.3dnews.ru/mobile/rss/'),
|
||||
('\u041f\u043b\u0430\u043d\u0448\u0435\u0442\u044b', 'http://www.3dnews.ru/tablets/rss/'),
|
||||
('\u0417\u0432\u0443\u043a \u0438 \u0430\u043a\u0443\u0441\u0442\u0438\u043a\u0430', 'http://www.3dnews.ru/multimedia/rss/'),
|
||||
('\u0412\u0438\u0434\u0435\u043e\u043a\u0430\u0440\u0442\u044b',
|
||||
'http://www.3dnews.ru/video/rss/'),
|
||||
('\u041c\u043e\u043d\u0438\u0442\u043e\u0440\u044b \u0438 \u043f\u0440\u043e\u0435\u043a\u0442\u043e\u0440\u044b',
|
||||
'http://www.3dnews.ru/display/rss/'),
|
||||
('\u041d\u0430\u043a\u043e\u043f\u0438\u0442\u0435\u043b\u0438',
|
||||
'http://www.3dnews.ru/storage/rss/'),
|
||||
('\u0426\u0438\u0444\u0440\u043e\u0432\u043e\u0439 \u0430\u0432\u0442\u043e\u043c\u043e\u0431\u0438\u043b\u044c',
|
||||
'http://www.3dnews.ru/auto/rss/'),
|
||||
('\u0421\u043e\u0442\u043e\u0432\u0430\u044f \u0441\u0432\u044f\u0437\u044c',
|
||||
'http://www.3dnews.ru/phone/rss/'),
|
||||
('\u041f\u0435\u0440\u0438\u0444\u0435\u0440\u0438\u044f',
|
||||
'http://www.3dnews.ru/peripheral/rss/'),
|
||||
('\u041d\u043e\u0443\u0442\u0431\u0443\u043a\u0438 \u0438 \u041f\u041a',
|
||||
'http://www.3dnews.ru/mobile/rss/'),
|
||||
('\u041f\u043b\u0430\u043d\u0448\u0435\u0442\u044b',
|
||||
'http://www.3dnews.ru/tablets/rss/'),
|
||||
('\u0417\u0432\u0443\u043a \u0438 \u0430\u043a\u0443\u0441\u0442\u0438\u043a\u0430',
|
||||
'http://www.3dnews.ru/multimedia/rss/'),
|
||||
('\u0426\u0438\u0444\u0440\u043e\u0432\u043e\u0435 \u0444\u043e\u0442\u043e \u0438 \u0432\u0438\u0434\u0435\u043e',
|
||||
'http://www.3dnews.ru/digital/rss/'),
|
||||
('\u0421\u0435\u0442\u0438 \u0438 \u043a\u043e\u043c\u043c\u0443\u043d\u0438\u043a\u0430\u0446\u0438\u0438', 'http://www.3dnews.ru/communication/rss/'),
|
||||
('\u0421\u0435\u0442\u0438 \u0438 \u043a\u043e\u043c\u043c\u0443\u043d\u0438\u043a\u0430\u0446\u0438\u0438',
|
||||
'http://www.3dnews.ru/communication/rss/'),
|
||||
('\u0418\u0433\u0440\u044b', 'http://www.3dnews.ru/games/rss/'),
|
||||
('\u041f\u0440\u043e\u0433\u0440\u0430\u043c\u043c\u043d\u043e\u0435 \u043e\u0431\u0435\u0441\u043f\u0435\u0447\u0435\u043d\u0438\u0435',
|
||||
'http://www.3dnews.ru/software/rss/'),
|
||||
('Off-\u0441\u044f\u043d\u043a\u0430', 'http://www.3dnews.ru/offsyanka/rss/'),
|
||||
('\u041c\u0430\u0441\u0442\u0435\u0440\u0441\u043a\u0430\u044f', 'http://www.3dnews.ru/workshop/rss/'),
|
||||
('Off-\u0441\u044f\u043d\u043a\u0430',
|
||||
'http://www.3dnews.ru/offsyanka/rss/'),
|
||||
('\u041c\u0430\u0441\u0442\u0435\u0440\u0441\u043a\u0430\u044f',
|
||||
'http://www.3dnews.ru/workshop/rss/'),
|
||||
('ServerNews', 'http://servernews.ru/rss'),
|
||||
]
|
||||
|
@ -9,6 +9,7 @@ elargentino.com
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
from calibre.ebooks.BeautifulSoup import Tag
|
||||
|
||||
|
||||
class SieteDias(BasicNewsRecipe):
|
||||
title = '7 dias'
|
||||
__author__ = 'Darko Miletic'
|
||||
@ -27,15 +28,7 @@ class SieteDias(BasicNewsRecipe):
|
||||
INDEX = 'http://www.elargentino.com/medios/125/7-Dias.html'
|
||||
extra_css = ' .titulo{font-size: x-large; font-weight: bold} .volantaImp{font-size: small; font-weight: bold} '
|
||||
|
||||
html2lrf_options = [
|
||||
'--comment' , description
|
||||
, '--category' , category
|
||||
, '--publisher', publisher
|
||||
]
|
||||
|
||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\noverride_css=" p {text-indent: 0cm; margin-top: 0em; margin-bottom: 0.5em} "'
|
||||
|
||||
keep_only_tags = [dict(name='div', attrs={'class':'ContainerPop'})]
|
||||
keep_only_tags = [dict(name='div', attrs={'class': 'ContainerPop'})]
|
||||
|
||||
remove_tags = [dict(name='link')]
|
||||
|
||||
@ -50,19 +43,22 @@ class SieteDias(BasicNewsRecipe):
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
soup.html['lang'] = self.lang
|
||||
soup.html['dir' ] = self.direction
|
||||
mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
|
||||
mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=utf-8")])
|
||||
soup.head.insert(0,mlang)
|
||||
soup.head.insert(1,mcharset)
|
||||
soup.html['dir'] = self.direction
|
||||
mlang = Tag(soup, 'meta', [
|
||||
("http-equiv", "Content-Language"), ("content", self.lang)])
|
||||
mcharset = Tag(soup, 'meta', [
|
||||
("http-equiv", "Content-Type"), ("content", "text/html; charset=utf-8")])
|
||||
soup.head.insert(0, mlang)
|
||||
soup.head.insert(1, mcharset)
|
||||
return soup
|
||||
|
||||
def get_cover_url(self):
|
||||
cover_url = None
|
||||
soup = self.index_to_soup(self.INDEX)
|
||||
cover_item = soup.find('div',attrs={'class':'colder'})
|
||||
cover_item = soup.find('div', attrs={'class': 'colder'})
|
||||
if cover_item:
|
||||
clean_url = self.image_url_processor(None,cover_item.div.img['src'])
|
||||
clean_url = self.image_url_processor(
|
||||
None, cover_item.div.img['src'])
|
||||
cover_url = 'http://www.elargentino.com' + clean_url + '&height=600'
|
||||
return cover_url
|
||||
|
||||
|
@ -9,6 +9,7 @@ sapteseri.ro
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class SapteSeri(BasicNewsRecipe):
|
||||
title = u'Sapte Seri'
|
||||
__author__ = u'Silviu Cotoar\u0103'
|
||||
@ -26,25 +27,21 @@ class SapteSeri(BasicNewsRecipe):
|
||||
cover_url = 'http://www.sapteseri.ro/Images/logo.jpg'
|
||||
|
||||
conversion_options = {
|
||||
'comments' : description
|
||||
,'tags' : category
|
||||
,'language' : language
|
||||
,'publisher' : publisher
|
||||
'comments': description, 'tags': category, 'language': language, 'publisher': publisher
|
||||
}
|
||||
|
||||
keep_only_tags = [
|
||||
dict(name='h1', attrs={'id':'title'})
|
||||
, dict(name='div', attrs={'class':'mt10 mb10'})
|
||||
, dict(name='div', attrs={'class':'mb20 mt10'})
|
||||
, dict(name='div', attrs={'class':'mt5 mb20'})
|
||||
dict(name='h1', attrs={'id': 'title'}), dict(name='div', attrs={'class': 'mt10 mb10'}), dict(
|
||||
name='div', attrs={'class': 'mb20 mt10'}), dict(name='div', attrs={'class': 'mt5 mb20'})
|
||||
]
|
||||
|
||||
remove_tags = [
|
||||
dict(name='div', attrs={'id':['entityimgworking']})
|
||||
dict(name='div', attrs={'id': ['entityimgworking']})
|
||||
]
|
||||
|
||||
feeds = [
|
||||
(u'Ce se intampla azi in Bucuresti', u'http://www.sapteseri.ro/ro/feed/ce-se-intampla-azi/bucuresti/')
|
||||
(u'Ce se intampla azi in Bucuresti',
|
||||
u'http://www.sapteseri.ro/ro/feed/ce-se-intampla-azi/bucuresti/')
|
||||
]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
|
@ -9,6 +9,7 @@ http://www.ansa.it/
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class Ansa(BasicNewsRecipe):
|
||||
__author__ = 'Gabriele Marini'
|
||||
description = 'Italian News Agency'
|
||||
@ -28,19 +29,17 @@ class Ansa(BasicNewsRecipe):
|
||||
|
||||
remove_javascript = True
|
||||
no_stylesheets = True
|
||||
conversion_options = {'linearize_tables':True}
|
||||
conversion_options = {'linearize_tables': True}
|
||||
remove_attributes = ['colspan']
|
||||
|
||||
keep_only_tags = [dict(name='div', attrs={'class':['path','header-content','corpo']}),
|
||||
keep_only_tags = [dict(name='div', attrs={'class': ['path', 'header-content', 'corpo']}),
|
||||
]
|
||||
|
||||
|
||||
remove_tags = [
|
||||
dict(name='div', attrs={'class':'tools-bar'}),
|
||||
dict(name='div', attrs={'id':['rssdiv','blocco']})
|
||||
dict(name='div', attrs={'class': 'tools-bar'}),
|
||||
dict(name='div', attrs={'id': ['rssdiv', 'blocco']})
|
||||
]
|
||||
|
||||
|
||||
feeds = [
|
||||
(u'HomePage', u'http://www.ansa.it/web/ansait_web_rss_homepage.xml'),
|
||||
(u'Top New', u'http://www.ansa.it/web/notizie/rubriche/topnews/topnews_rss.xml'),
|
||||
@ -50,9 +49,11 @@ class Ansa(BasicNewsRecipe):
|
||||
(u'Politica', u'http://www.ansa.it/web/notizie/rubriche/politica/politica_rss.xml'),
|
||||
(u'Scienze', u'http://www.ansa.it/web/notizie/rubriche/scienza/scienza_rss.xml'),
|
||||
(u'Cinema', u'http://www.ansa.it/web/notizie/rubriche/cinema/cinema_rss.xml'),
|
||||
(u'Tecnologia e Internet', u'http://www.ansa.it/web/notizie/rubriche/tecnologia/tecnologia_rss.xml'),
|
||||
(u'Tecnologia e Internet',
|
||||
u'http://www.ansa.it/web/notizie/rubriche/tecnologia/tecnologia_rss.xml'),
|
||||
(u'Spettacolo', u'http://www.ansa.it/web/notizie/rubriche/spettacolo/spettacolo_rss.xml'),
|
||||
(u'Cultura e Tendenze', u'http://www.ansa.it/web/notizie/rubriche/cultura/cultura_rss.xml'),
|
||||
(u'Cultura e Tendenze',
|
||||
u'http://www.ansa.it/web/notizie/rubriche/cultura/cultura_rss.xml'),
|
||||
(u'Sport', u'http://www.ansa.it/web/notizie/rubriche/altrisport/altrisport_rss.xml'),
|
||||
(u'Calcio', u'http://www.ansa.it/web/notizie/rubriche/calcio/calcio_rss.xml'),
|
||||
(u'Lazio', u'http://www.ansa.it/web/notizie/regioni/lazio/lazio_rss.xml'),
|
||||
|
@ -1,6 +1,7 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
import re
|
||||
|
||||
|
||||
class DrawAndCook(BasicNewsRecipe):
|
||||
title = 'DrawAndCook'
|
||||
__author__ = 'Starson17'
|
||||
@ -10,7 +11,7 @@ class DrawAndCook(BasicNewsRecipe):
|
||||
language = 'en'
|
||||
publisher = 'Starson17'
|
||||
category = 'news, food, recipes'
|
||||
use_embedded_content= False
|
||||
use_embedded_content = False
|
||||
no_stylesheets = True
|
||||
oldest_article = 24
|
||||
remove_javascript = True
|
||||
@ -38,21 +39,23 @@ class DrawAndCook(BasicNewsRecipe):
|
||||
date = ''
|
||||
current_articles = []
|
||||
soup = self.index_to_soup(url)
|
||||
featured_major_slider = soup.find(name='div', attrs={'id':'featured_major_slider'})
|
||||
recipes = featured_major_slider.findAll('li', attrs={'data-id': re.compile(r'artwork_entry_\d+', re.DOTALL)})
|
||||
featured_major_slider = soup.find(
|
||||
name='div', attrs={'id': 'featured_major_slider'})
|
||||
recipes = featured_major_slider.findAll(
|
||||
'li', attrs={'data-id': re.compile(r'artwork_entry_\d+', re.DOTALL)})
|
||||
for recipe in recipes:
|
||||
page_url = self.INDEX + recipe.a['href']
|
||||
print 'page_url is: ', page_url
|
||||
title = recipe.find('strong').string
|
||||
print 'title is: ', title
|
||||
current_articles.append({'title': title, 'url': page_url, 'description':'', 'date':date})
|
||||
current_articles.append(
|
||||
{'title': title, 'url': page_url, 'description': '', 'date': date})
|
||||
return current_articles
|
||||
|
||||
keep_only_tags = [dict(name='h1', attrs={'id':'page_title'})
|
||||
,dict(name='section', attrs={'id':'artwork'})
|
||||
keep_only_tags = [dict(name='h1', attrs={'id': 'page_title'}), dict(name='section', attrs={'id': 'artwork'})
|
||||
]
|
||||
|
||||
remove_tags = [dict(name='article', attrs={'id':['recipe_actions', 'metadata']})
|
||||
remove_tags = [dict(name='article', attrs={'id': ['recipe_actions', 'metadata']})
|
||||
]
|
||||
|
||||
extra_css = '''
|
||||
@ -62,4 +65,3 @@ class DrawAndCook(BasicNewsRecipe):
|
||||
p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
|
||||
body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
|
||||
'''
|
||||
|
||||
|
@ -2,7 +2,6 @@ from calibre.web.feeds.news import BasicNewsRecipe
|
||||
import re
|
||||
|
||||
|
||||
|
||||
class ZiveRecipe(BasicNewsRecipe):
|
||||
__license__ = 'GPL v3'
|
||||
__author__ = 'Abelturd'
|
||||
@ -25,21 +24,20 @@ class ZiveRecipe(BasicNewsRecipe):
|
||||
cover_url = 'http://www.zive.sk/Client.Images/Logos/logo-zive-sk.gif'
|
||||
|
||||
feeds = []
|
||||
feeds.append((u'V\u0161etky \u010dl\xe1nky', u'http://www.zive.sk/rss/sc-47/default.aspx'))
|
||||
feeds.append((u'V\u0161etky \u010dl\xe1nky',
|
||||
u'http://www.zive.sk/rss/sc-47/default.aspx'))
|
||||
|
||||
preprocess_regexps = [
|
||||
(re.compile(r'<p><p><strong>Pokra.*ie</strong></p>', re.DOTALL|re.IGNORECASE),
|
||||
(re.compile(r'<p><p><strong>Pokra.*ie</strong></p>', re.DOTALL | re.IGNORECASE),
|
||||
lambda match: ''),
|
||||
|
||||
]
|
||||
|
||||
|
||||
remove_tags = []
|
||||
|
||||
keep_only_tags = [dict(name='h1'), dict(name='span', attrs={'class':'arlist-data-info-author'}), dict(name='div', attrs={'class':'bbtext font-resizer-area'}),]
|
||||
keep_only_tags = [dict(name='h1'), dict(name='span', attrs={
|
||||
'class': 'arlist-data-info-author'}), dict(name='div', attrs={'class': 'bbtext font-resizer-area'}), ]
|
||||
extra_css = '''
|
||||
h1 {font-size:140%;font-family:georgia,serif; font-weight:bold}
|
||||
h3 {font-size:115%;font-family:georgia,serif; font-weight:bold}
|
||||
'''
|
||||
|
||||
|
||||
|
@ -1,8 +1,10 @@
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
|
||||
|
||||
class AdvancedUserRecipe(BasicNewsRecipe):
|
||||
|
||||
title = u'Aachener Nachrichten'
|
||||
__author__ = 'schuster' #AGE update 2012-11-28
|
||||
__author__ = 'schuster' # AGE update 2012-11-28
|
||||
oldest_article = 1
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
@ -14,58 +16,98 @@ class AdvancedUserRecipe(BasicNewsRecipe):
|
||||
masthead_url = 'http://www.aachener-nachrichten.de/img/logos/an_website_retina.png'
|
||||
|
||||
keep_only_tags = [
|
||||
dict(name='article', attrs={'class':['single']})
|
||||
dict(name='article', attrs={'class': ['single']})
|
||||
]
|
||||
|
||||
remove_tags = [
|
||||
dict(name='div', attrs={'class':["clearfix navi-wrapper"]}),
|
||||
dict(name='div', attrs={'id':["article_actions"]}),
|
||||
dict(name='style', attrs={'type':["text/css"]}),
|
||||
dict(name='div', attrs={'class': ["clearfix navi-wrapper"]}),
|
||||
dict(name='div', attrs={'id': ["article_actions"]}),
|
||||
dict(name='style', attrs={'type': ["text/css"]}),
|
||||
dict(name='aside'),
|
||||
dict(name='a', attrs={'class':["btn btn-action"]})
|
||||
dict(name='a', attrs={'class': ["btn btn-action"]})
|
||||
]
|
||||
|
||||
feeds = [
|
||||
(u'Lokales - Euregio', u'http://www.aachener-nachrichten.de/cmlink/euregio-rss-1.357285'),
|
||||
(u'Lokales - Aachen', u'http://www.aachener-nachrichten.de/cmlink/aachen-rss-1.357286'),
|
||||
(u'Lokales - Nordkreis', u'http://www.aachener-nachrichten.de/cmlink/nordkreis-rss-1.358150'),
|
||||
(u'Lokales - Düren', u'http://www.aachener-nachrichten.de/cmlink/dueren-rss-1.358626'),
|
||||
(u'Lokales - Eiffel', u'http://www.aachener-nachrichten.de/cmlink/eifel-rss-1.358978'),
|
||||
(u'Lokales - Eschweiler', u'http://www.aachener-nachrichten.de/cmlink/eschweiler-rss-1.359332'),
|
||||
(u'Lokales - Geilenkirchen', u'http://www.aachener-nachrichten.de/cmlink/geilenkirchen-rss-1.359643'),
|
||||
(u'Lokales - Heinsberg', u'http://www.aachener-nachrichten.de/cmlink/heinsberg-rss-1.359724'),
|
||||
(u'Lokales - Jülich', u'http://www.aachener-nachrichten.de/cmlink/juelich-rss-1.359725'),
|
||||
(u'Lokales - Stolberg', u'http://www.aachener-nachrichten.de/cmlink/stolberg-rss-1.359726'),
|
||||
(u'News - Politik', u'http://www.aachener-nachrichten.de/cmlink/politik-rss-1.359727'),
|
||||
(u'News - Aus aller Welt', u'http://www.aachener-nachrichten.de/cmlink/ausallerwelt-rss-1.453282'),
|
||||
(u'News - Wirtschaft', u'http://www.aachener-nachrichten.de/cmlink/wirtschaft-rss-1.359872'),
|
||||
(u'News - Kultur', u'http://www.aachener-nachrichten.de/cmlink/kultur-rss-1.365018'),
|
||||
(u'Lokales - Euregio',
|
||||
u'http://www.aachener-nachrichten.de/cmlink/euregio-rss-1.357285'),
|
||||
(u'Lokales - Aachen',
|
||||
u'http://www.aachener-nachrichten.de/cmlink/aachen-rss-1.357286'),
|
||||
(u'Lokales - Nordkreis',
|
||||
u'http://www.aachener-nachrichten.de/cmlink/nordkreis-rss-1.358150'),
|
||||
(u'Lokales - Düren',
|
||||
u'http://www.aachener-nachrichten.de/cmlink/dueren-rss-1.358626'),
|
||||
(u'Lokales - Eiffel',
|
||||
u'http://www.aachener-nachrichten.de/cmlink/eifel-rss-1.358978'),
|
||||
(u'Lokales - Eschweiler',
|
||||
u'http://www.aachener-nachrichten.de/cmlink/eschweiler-rss-1.359332'),
|
||||
(u'Lokales - Geilenkirchen',
|
||||
u'http://www.aachener-nachrichten.de/cmlink/geilenkirchen-rss-1.359643'),
|
||||
(u'Lokales - Heinsberg',
|
||||
u'http://www.aachener-nachrichten.de/cmlink/heinsberg-rss-1.359724'),
|
||||
(u'Lokales - Jülich',
|
||||
u'http://www.aachener-nachrichten.de/cmlink/juelich-rss-1.359725'),
|
||||
(u'Lokales - Stolberg',
|
||||
u'http://www.aachener-nachrichten.de/cmlink/stolberg-rss-1.359726'),
|
||||
(u'News - Politik',
|
||||
u'http://www.aachener-nachrichten.de/cmlink/politik-rss-1.359727'),
|
||||
(u'News - Aus aller Welt',
|
||||
u'http://www.aachener-nachrichten.de/cmlink/ausallerwelt-rss-1.453282'),
|
||||
(u'News - Wirtschaft',
|
||||
u'http://www.aachener-nachrichten.de/cmlink/wirtschaft-rss-1.359872'),
|
||||
(u'News - Kultur',
|
||||
u'http://www.aachener-nachrichten.de/cmlink/kultur-rss-1.365018'),
|
||||
(u'News - Kino', u'http://www.aachener-nachrichten.de/cmlink/kino-rss-1.365019'),
|
||||
(u'News - Digital', u'http://www.aachener-nachrichten.de/cmlink/digital-rss-1.365020'),
|
||||
(u'News - Wissenschaft', u'http://www.aachener-nachrichten.de/cmlink/wissenschaft-rss-1.365021'),
|
||||
(u'News - Hochschule', u'http://www.aachener-nachrichten.de/cmlink/hochschule-rss-1.365022'),
|
||||
(u'News - Digital',
|
||||
u'http://www.aachener-nachrichten.de/cmlink/digital-rss-1.365020'),
|
||||
(u'News - Wissenschaft',
|
||||
u'http://www.aachener-nachrichten.de/cmlink/wissenschaft-rss-1.365021'),
|
||||
(u'News - Hochschule',
|
||||
u'http://www.aachener-nachrichten.de/cmlink/hochschule-rss-1.365022'),
|
||||
(u'News - Auto', u'http://www.aachener-nachrichten.de/cmlink/auto-rss-1.365023'),
|
||||
(u'News - Kurioses', u'http://www.aachener-nachrichten.de/cmlink/kurioses-rss-1.365067'),
|
||||
(u'News - Musik', u'http://www.aachener-nachrichten.de/cmlink/musik-rss-1.365305'),
|
||||
(u'News - Tagesthema', u'http://www.aachener-nachrichten.de/cmlink/tagesthema-rss-1.365519'),
|
||||
(u'News - Newsticker', u'http://www.aachener-nachrichten.de/cmlink/newsticker-rss-1.451948'),
|
||||
(u'Sport - Aktuell', u'http://www.aachener-nachrichten.de/cmlink/aktuell-rss-1.366716'),
|
||||
(u'Sport - Fußball', u'http://www.aachener-nachrichten.de/cmlink/fussball-rss-1.367060'),
|
||||
(u'Sport - Bundesliga', u'http://www.aachener-nachrichten.de/cmlink/bundesliga-rss-1.453367'),
|
||||
(u'Sport - Alemannia Aachen', u'http://www.aachener-nachrichten.de/cmlink/alemanniaaachen-rss-1.366057'),
|
||||
(u'Sport - Volleyball', u'http://www.aachener-nachrichten.de/cmlink/volleyball-rss-1.453370'),
|
||||
(u'Sport - Chio', u'http://www.aachener-nachrichten.de/cmlink/chio-rss-1.453371'),
|
||||
(u'Dossier - Kinderuni', u'http://www.aachener-nachrichten.de/cmlink/kinderuni-rss-1.453375'),
|
||||
(u'Dossier - Karlspreis', u'http://www.aachener-nachrichten.de/cmlink/karlspreis-rss-1.453376'),
|
||||
(u'Dossier - Ritterorden', u'http://www.aachener-nachrichten.de/cmlink/ritterorden-rss-1.453377'),
|
||||
(u'Dossier - ZAB-Aachen', u'http://www.aachener-nachrichten.de/cmlink/zabaachen-rss-1.453380'),
|
||||
(u'Dossier - Karneval', u'http://www.aachener-nachrichten.de/cmlink/karneval-rss-1.453384'),
|
||||
(u'Ratgeber - Geld', u'http://www.aachener-nachrichten.de/cmlink/geld-rss-1.453385'),
|
||||
(u'Ratgeber - Recht', u'http://www.aachener-nachrichten.de/cmlink/recht-rss-1.453386'),
|
||||
(u'Ratgeber - Gesundheit', u'http://www.aachener-nachrichten.de/cmlink/gesundheit-rss-1.453387'),
|
||||
(u'Ratgeber - Familie', u'http://www.aachener-nachrichten.de/cmlink/familie-rss-1.453388'),
|
||||
(u'Ratgeber - Livestyle', u'http://www.aachener-nachrichten.de/cmlink/lifestyle-rss-1.453389'),
|
||||
(u'Ratgeber - Reisen', u'http://www.aachener-nachrichten.de/cmlink/reisen-rss-1.453390'),
|
||||
(u'Ratgeber - Bauen und Wohnen', u'http://www.aachener-nachrichten.de/cmlink/bauen-rss-1.453398'),
|
||||
(u'Ratgeber - Bildung und Beruf', u'http://www.aachener-nachrichten.de/cmlink/bildung-rss-1.453400'),
|
||||
(u'News - Kurioses',
|
||||
u'http://www.aachener-nachrichten.de/cmlink/kurioses-rss-1.365067'),
|
||||
(u'News - Musik',
|
||||
u'http://www.aachener-nachrichten.de/cmlink/musik-rss-1.365305'),
|
||||
(u'News - Tagesthema',
|
||||
u'http://www.aachener-nachrichten.de/cmlink/tagesthema-rss-1.365519'),
|
||||
(u'News - Newsticker',
|
||||
u'http://www.aachener-nachrichten.de/cmlink/newsticker-rss-1.451948'),
|
||||
(u'Sport - Aktuell',
|
||||
u'http://www.aachener-nachrichten.de/cmlink/aktuell-rss-1.366716'),
|
||||
(u'Sport - Fußball',
|
||||
u'http://www.aachener-nachrichten.de/cmlink/fussball-rss-1.367060'),
|
||||
(u'Sport - Bundesliga',
|
||||
u'http://www.aachener-nachrichten.de/cmlink/bundesliga-rss-1.453367'),
|
||||
(u'Sport - Alemannia Aachen',
|
||||
u'http://www.aachener-nachrichten.de/cmlink/alemanniaaachen-rss-1.366057'),
|
||||
(u'Sport - Volleyball',
|
||||
u'http://www.aachener-nachrichten.de/cmlink/volleyball-rss-1.453370'),
|
||||
(u'Sport - Chio',
|
||||
u'http://www.aachener-nachrichten.de/cmlink/chio-rss-1.453371'),
|
||||
(u'Dossier - Kinderuni',
|
||||
u'http://www.aachener-nachrichten.de/cmlink/kinderuni-rss-1.453375'),
|
||||
(u'Dossier - Karlspreis',
|
||||
u'http://www.aachener-nachrichten.de/cmlink/karlspreis-rss-1.453376'),
|
||||
(u'Dossier - Ritterorden',
|
||||
u'http://www.aachener-nachrichten.de/cmlink/ritterorden-rss-1.453377'),
|
||||
(u'Dossier - ZAB-Aachen',
|
||||
u'http://www.aachener-nachrichten.de/cmlink/zabaachen-rss-1.453380'),
|
||||
(u'Dossier - Karneval',
|
||||
u'http://www.aachener-nachrichten.de/cmlink/karneval-rss-1.453384'),
|
||||
(u'Ratgeber - Geld',
|
||||
u'http://www.aachener-nachrichten.de/cmlink/geld-rss-1.453385'),
|
||||
(u'Ratgeber - Recht',
|
||||
u'http://www.aachener-nachrichten.de/cmlink/recht-rss-1.453386'),
|
||||
(u'Ratgeber - Gesundheit',
|
||||
u'http://www.aachener-nachrichten.de/cmlink/gesundheit-rss-1.453387'),
|
||||
(u'Ratgeber - Familie',
|
||||
u'http://www.aachener-nachrichten.de/cmlink/familie-rss-1.453388'),
|
||||
(u'Ratgeber - Livestyle',
|
||||
u'http://www.aachener-nachrichten.de/cmlink/lifestyle-rss-1.453389'),
|
||||
(u'Ratgeber - Reisen',
|
||||
u'http://www.aachener-nachrichten.de/cmlink/reisen-rss-1.453390'),
|
||||
(u'Ratgeber - Bauen und Wohnen',
|
||||
u'http://www.aachener-nachrichten.de/cmlink/bauen-rss-1.453398'),
|
||||
(u'Ratgeber - Bildung und Beruf',
|
||||
u'http://www.aachener-nachrichten.de/cmlink/bildung-rss-1.453400'),
|
||||
]
|
||||
|
@ -1,39 +1,41 @@
|
||||
import re
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class ABCRecipe(BasicNewsRecipe):
|
||||
title = u'ABC Linuxu'
|
||||
oldest_article = 5
|
||||
max_articles_per_feed = 3#5
|
||||
max_articles_per_feed = 3 # 5
|
||||
__author__ = 'Funthomas'
|
||||
language = 'cs'
|
||||
|
||||
feeds = [
|
||||
#(u'Blogy', u'http://www.abclinuxu.cz/auto/blogDigest.rss'),
|
||||
# (u'Blogy', u'http://www.abclinuxu.cz/auto/blogDigest.rss'),
|
||||
(u'Články', u'http://www.abclinuxu.cz/auto/abc.rss'),
|
||||
(u'Zprávičky','http://www.abclinuxu.cz/auto/zpravicky.rss')
|
||||
(u'Zprávičky', 'http://www.abclinuxu.cz/auto/zpravicky.rss')
|
||||
]
|
||||
|
||||
remove_javascript = True
|
||||
no_stylesheets = True
|
||||
remove_attributes = ['width','height']
|
||||
remove_attributes = ['width', 'height']
|
||||
|
||||
remove_tags_before = dict(name='h1')
|
||||
remove_tags = [
|
||||
dict(attrs={'class':['meta-vypis','page_tools','cl_perex']}),
|
||||
dict(attrs={'class':['cl_nadpis-link','komix-nav']})
|
||||
dict(attrs={'class': ['meta-vypis', 'page_tools', 'cl_perex']}),
|
||||
dict(attrs={'class': ['cl_nadpis-link', 'komix-nav']})
|
||||
]
|
||||
|
||||
remove_tags_after = [
|
||||
dict(name='div',attrs={'class':['cl_perex','komix-nav']}),
|
||||
dict(attrs={'class':['meta-vypis','page_tools']}),
|
||||
dict(name='',attrs={'':''}),
|
||||
dict(name='div', attrs={'class': ['cl_perex', 'komix-nav']}),
|
||||
dict(attrs={'class': ['meta-vypis', 'page_tools']}),
|
||||
dict(name='', attrs={'': ''}),
|
||||
]
|
||||
|
||||
|
||||
preprocess_regexps = [
|
||||
(re.compile(r'</div>.*<p class="perex">', re.DOTALL),lambda match: '</div><p class="perex">')
|
||||
(re.compile(r'</div>.*<p class="perex">', re.DOTALL),
|
||||
lambda match: '</div><p class="perex">')
|
||||
]
|
||||
|
||||
def print_version(self, url):
|
||||
return url + '?varianta=print&noDiz'
|
||||
|
||||
|
@ -6,6 +6,7 @@ abc.net.au/news
|
||||
import re
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
|
||||
|
||||
class ABCNews(BasicNewsRecipe):
|
||||
title = 'ABC News'
|
||||
__author__ = 'Pat Stapleton, Dean Cording'
|
||||
@ -16,7 +17,6 @@ class ABCNews(BasicNewsRecipe):
|
||||
oldest_article = 2
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = False
|
||||
#delay = 1
|
||||
use_embedded_content = False
|
||||
encoding = 'utf8'
|
||||
publisher = 'ABC News'
|
||||
@ -24,23 +24,21 @@ class ABCNews(BasicNewsRecipe):
|
||||
language = 'en_AU'
|
||||
publication_type = 'newsportal'
|
||||
# preprocess_regexps = [(re.compile(r'<!--.*?-->', re.DOTALL), lambda m: '')]
|
||||
#Remove annoying map links (inline-caption class is also used for some image captions! hence regex to match maps.google)
|
||||
preprocess_regexps = [(re.compile(r'<a class="inline-caption" href="http://maps\.google\.com.*?/a>', re.DOTALL), lambda m: '')]
|
||||
# Remove annoying map links (inline-caption class is also used for some
|
||||
# image captions! hence regex to match maps.google)
|
||||
preprocess_regexps = [(re.compile(
|
||||
r'<a class="inline-caption" href="http://maps\.google\.com.*?/a>', re.DOTALL), lambda m: '')]
|
||||
conversion_options = {
|
||||
'comments' : description
|
||||
,'tags' : category
|
||||
,'language' : language
|
||||
,'publisher' : publisher
|
||||
,'linearize_tables': False
|
||||
'comments': description, 'tags': category, 'language': language, 'publisher': publisher, 'linearize_tables': False
|
||||
}
|
||||
|
||||
keep_only_tags = [dict(attrs={'class':['article section']})]
|
||||
keep_only_tags = [dict(attrs={'class': ['article section']})]
|
||||
|
||||
remove_tags = [dict(attrs={'class':['related', 'tags', 'tools', 'attached-content ready',
|
||||
remove_tags = [dict(attrs={'class': ['related', 'tags', 'tools', 'attached-content ready',
|
||||
'inline-content story left', 'inline-content map left contracted', 'published',
|
||||
'story-map', 'statepromo', 'topics', ]})]
|
||||
|
||||
remove_attributes = ['width','height']
|
||||
remove_attributes = ['width', 'height']
|
||||
|
||||
feeds = [
|
||||
('Top Stories', 'http://www.abc.net.au/news/feed/45910/rss.xml'),
|
||||
@ -52,5 +50,6 @@ class ABCNews(BasicNewsRecipe):
|
||||
('Australia', 'http://www.abc.net.au/news/feed/46182/rss.xml'),
|
||||
('World', 'http://www.abc.net.au/news/feed/52278/rss.xml'),
|
||||
('Business', 'http://www.abc.net.au/news/feed/51892/rss.xml'),
|
||||
('Science and Technology', 'http://www.abc.net.au/news/feed/2298/rss.xml'),
|
||||
('Science and Technology',
|
||||
'http://www.abc.net.au/news/feed/2298/rss.xml'),
|
||||
]
|
||||
|
@ -10,6 +10,7 @@ http://www.abc.es/
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class AdvancedUserRecipe1296604369(BasicNewsRecipe):
|
||||
|
||||
title = u'ABC.es'
|
||||
@ -39,19 +40,20 @@ class AdvancedUserRecipe1296604369(BasicNewsRecipe):
|
||||
"""
|
||||
|
||||
feeds = [
|
||||
(u'PORTADA', u'http://www.abc.es/rss/feeds/abcPortada.xml')
|
||||
,(u'ULTIMAS', u'http://www.abc.es/rss/feeds/abc_ultima.xml')
|
||||
,(u'NACIONAL', u'http://www.abc.es/rss/feeds/abc_EspanaEspana.xml')
|
||||
,(u'INTERNACIONAL', u'http://www.abc.es/rss/feeds/abc_Internacional.xml')
|
||||
,(u'OPINION', u'http://www.abc.es/rss/feeds/abc_opinioncompleto.xml')
|
||||
,(u'BLOGS ABC', u'http://www.abc.es/rss/feeds/blogs-abc.xml')
|
||||
,(u'ECONOMIA', u'http://www.abc.es/rss/feeds/abc_Economia.xml')
|
||||
,(u'CIENCIA Y TECNOLOGIA', u'http://www.abc.es/rss/feeds/abc_Ciencia_Tecnologia.xml')
|
||||
,(u'CULTURA', u'http://www.abc.es/rss/feeds/abc_Cultura.xml')
|
||||
,(u'LIBROS', u'http://www.abc.es/rss/feeds/abc_Libros.xml')
|
||||
,(u'MEDIOS Y REDES', u'http://www.abc.es/rss/feeds/ABC_Medios_Redes.xml')
|
||||
,(u'EVASION', u'http://www.abc.es/rss/feeds/abc_evasion.xml')
|
||||
,(u'ESPECTACULOS', u'http://www.abc.es/rss/feeds/abc_Espectaculos.xml')
|
||||
,(u'GENTE', u'http://www.abc.es/rss/feeds/abc_Gente.xml')
|
||||
,(u'DEPORTES', u'http://www.abc.es/rss/feeds/abc_Deportes.xml')
|
||||
|
||||
(u'PORTADA', u'http://www.abc.es/rss/feeds/abcPortada.xml'),
|
||||
(u'ULTIMAS', u'http://www.abc.es/rss/feeds/abc_ultima.xml'),
|
||||
(u'NACIONAL', u'http://www.abc.es/rss/feeds/abc_EspanaEspana.xml'),
|
||||
(u'INTERNACIONAL', u'http://www.abc.es/rss/feeds/abc_Internacional.xml'),
|
||||
(u'OPINION', u'http://www.abc.es/rss/feeds/abc_opinioncompleto.xml'),
|
||||
(u'BLOGS ABC', u'http://www.abc.es/rss/feeds/blogs-abc.xml'),
|
||||
(u'ECONOMIA', u'http://www.abc.es/rss/feeds/abc_Economia.xml'),
|
||||
(u'CIENCIA Y TECNOLOGIA', u'http://www.abc.es/rss/feeds/abc_Ciencia_Tecnologia.xml'),
|
||||
(u'CULTURA', u'http://www.abc.es/rss/feeds/abc_Cultura.xml'),
|
||||
(u'LIBROS', u'http://www.abc.es/rss/feeds/abc_Libros.xml'),
|
||||
(u'MEDIOS Y REDES', u'http://www.abc.es/rss/feeds/ABC_Medios_Redes.xml'),
|
||||
(u'EVASION', u'http://www.abc.es/rss/feeds/abc_evasion.xml'),
|
||||
(u'ESPECTACULOS', u'http://www.abc.es/rss/feeds/abc_Espectaculos.xml'),
|
||||
(u'GENTE', u'http://www.abc.es/rss/feeds/abc_Gente.xml'),
|
||||
(u'DEPORTES', u'http://www.abc.es/rss/feeds/abc_Deportes.xml')
|
||||
]
|
||||
|
@ -6,6 +6,7 @@ abc.com.py
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class ABC_py(BasicNewsRecipe):
|
||||
title = 'ABC Color'
|
||||
__author__ = 'Darko Miletic'
|
||||
@ -27,25 +28,23 @@ class ABC_py(BasicNewsRecipe):
|
||||
"""
|
||||
|
||||
conversion_options = {
|
||||
'comment' : description
|
||||
, 'tags' : category
|
||||
, 'publisher' : publisher
|
||||
, 'language' : language
|
||||
'comment': description, 'tags': category, 'publisher': publisher, 'language': language
|
||||
}
|
||||
|
||||
remove_tags = [
|
||||
dict(name=['form','iframe','embed','object','link','base','table']),
|
||||
dict(attrs={'class':['es-carousel-wrapper']}),
|
||||
dict(attrs={'id':['tools','article-banner-1']})
|
||||
dict(name=['form', 'iframe', 'embed',
|
||||
'object', 'link', 'base', 'table']),
|
||||
dict(attrs={'class': ['es-carousel-wrapper']}),
|
||||
dict(attrs={'id': ['tools', 'article-banner-1']})
|
||||
]
|
||||
keep_only_tags = [dict(attrs={'id':'article'})]
|
||||
|
||||
keep_only_tags = [dict(attrs={'id': 'article'})]
|
||||
|
||||
feeds = [
|
||||
(u'Ultimo momento', u'http://www.abc.com.py/rss.xml' )
|
||||
,(u'Nacionales' , u'http://www.abc.com.py/nacionales/rss.xml' )
|
||||
,(u'Mundo' , u'http://www.abc.com.py/internacionales/rss.xml')
|
||||
,(u'Deportes' , u'http://www.abc.com.py/deportes/rss.xml' )
|
||||
,(u'Espectaculos' , u'http://www.abc.com.py/espectaculos/rss.xml' )
|
||||
,(u'TecnoCiencia' , u'http://www.abc.com.py/ciencia/rss.xml' )
|
||||
|
||||
(u'Ultimo momento', u'http://www.abc.com.py/rss.xml'),
|
||||
(u'Nacionales', u'http://www.abc.com.py/nacionales/rss.xml'),
|
||||
(u'Mundo', u'http://www.abc.com.py/internacionales/rss.xml'),
|
||||
(u'Deportes', u'http://www.abc.com.py/deportes/rss.xml'),
|
||||
(u'Espectaculos', u'http://www.abc.com.py/espectaculos/rss.xml'),
|
||||
(u'TecnoCiencia', u'http://www.abc.com.py/ciencia/rss.xml')
|
||||
]
|
||||
|
@ -8,6 +8,7 @@ www.accountancyage.com
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class AccountancyAge(BasicNewsRecipe):
|
||||
title = 'Accountancy Age'
|
||||
__author__ = 'Darko Miletic'
|
||||
@ -23,11 +24,12 @@ class AccountancyAge(BasicNewsRecipe):
|
||||
lang = 'en'
|
||||
language = 'en'
|
||||
|
||||
feeds = [(u'All News', u'http://feeds.accountancyage.com/rss/latest/accountancyage/all')]
|
||||
feeds = [
|
||||
(u'All News', u'http://feeds.accountancyage.com/rss/latest/accountancyage/all')]
|
||||
|
||||
keep_only_tags = [
|
||||
dict(name='h1'),
|
||||
dict(attrs={'class':'article_content'}),
|
||||
dict(attrs={'class': 'article_content'}),
|
||||
]
|
||||
|
||||
def get_article_url(self, article):
|
||||
|
@ -2,6 +2,7 @@
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class AdvancedUserRecipe1334868409(BasicNewsRecipe):
|
||||
title = u'AÇIK BİLİM DERGİSİ'
|
||||
description = ' Aylık çevrimiçi bilim dergisi'
|
||||
@ -15,13 +16,9 @@ class AdvancedUserRecipe1334868409(BasicNewsRecipe):
|
||||
language = 'tr'
|
||||
publication_type = 'magazine '
|
||||
conversion_options = {
|
||||
'tags' : category
|
||||
,'language' : language
|
||||
,'publisher' : publisher
|
||||
,'linearize_tables': True
|
||||
'tags': category, 'language': language, 'publisher': publisher, 'linearize_tables': True
|
||||
}
|
||||
cover_img_url = 'http://www.acikbilim.com/wp-content/themes/Equilibrium/images/logodene.jpg'
|
||||
masthead_url = 'http://www.acikbilim.com/wp-content/themes/Equilibrium/images/logodene.jpg'
|
||||
|
||||
|
||||
feeds = [(u'Tüm Yayınlar', u'http://www.acikbilim.com/feed')]
|
||||
|
@ -10,6 +10,7 @@ acrimed.org
|
||||
import re
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class Acrimed(BasicNewsRecipe):
|
||||
title = u'Acrimed'
|
||||
__author__ = 'Gaëtan Lehmann'
|
||||
@ -22,7 +23,8 @@ class Acrimed(BasicNewsRecipe):
|
||||
feeds = [(u'Acrimed', u'http://www.acrimed.org/spip.php?page=backend')]
|
||||
|
||||
preprocess_regexps = [
|
||||
(re.compile(r'<title>(.*) - Acrimed \| Action Critique M.*dias</title>'), lambda m: '<title>' + m.group(1) + '</title>'),
|
||||
(re.compile(r'<title>(.*) - Acrimed \| Action Critique M.*dias</title>'),
|
||||
lambda m: '<title>' + m.group(1) + '</title>'),
|
||||
(re.compile(r'<h2>(.*) - Acrimed \| Action Critique M.*dias</h2>'), lambda m: '<h2>' + m.group(1) + '</h2>')]
|
||||
|
||||
extra_css = """
|
||||
|
@ -1,6 +1,7 @@
|
||||
import re
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class ADRecipe(BasicNewsRecipe):
|
||||
__license__ = 'GPL v3'
|
||||
__author__ = 'kwetal'
|
||||
@ -22,41 +23,54 @@ class ADRecipe(BasicNewsRecipe):
|
||||
remove_javascript = True
|
||||
|
||||
keep_only_tags = []
|
||||
keep_only_tags.append(dict(name = 'div', attrs = {'id': 'art_box2'}))
|
||||
keep_only_tags.append(dict(name = 'p', attrs = {'class': 'gen_footnote3'}))
|
||||
keep_only_tags.append(dict(name='div', attrs={'id': 'art_box2'}))
|
||||
keep_only_tags.append(dict(name='p', attrs={'class': 'gen_footnote3'}))
|
||||
|
||||
remove_tags = []
|
||||
remove_tags.append(dict(name = 'div', attrs = {'class': 'gen_clear'}))
|
||||
remove_tags.append(dict(name = 'div', attrs = {'class': re.compile(r'gen_spacer.*')}))
|
||||
remove_tags.append(dict(name='div', attrs={'class': 'gen_clear'}))
|
||||
remove_tags.append(
|
||||
dict(name='div', attrs={'class': re.compile(r'gen_spacer.*')}))
|
||||
|
||||
remove_attributes = ['style']
|
||||
|
||||
# feeds from http://ad.nl/ad/nl/1401/home/integration/nmc/frameset/ad_footer/rssFeeds.dhtml
|
||||
# feeds from
|
||||
# http://ad.nl/ad/nl/1401/home/integration/nmc/frameset/ad_footer/rssFeeds.dhtml
|
||||
feeds = []
|
||||
feeds.append((u'Binnenland', u'http://www.ad.nl/nieuws/binnenland/rss.xml'))
|
||||
feeds.append((u'Buitenland', u'http://www.ad.nl/nieuws/buitenland/rss.xml'))
|
||||
feeds.append(
|
||||
(u'Binnenland', u'http://www.ad.nl/nieuws/binnenland/rss.xml'))
|
||||
feeds.append(
|
||||
(u'Buitenland', u'http://www.ad.nl/nieuws/buitenland/rss.xml'))
|
||||
feeds.append((u'Bizar', u'http://www.ad.nl/nieuws/bizar/rss.xml'))
|
||||
feeds.append((u'Gezondheid & Wetenschap', u'http://www.ad.nl/nieuws/gezondheidwetenschap/rss.xml'))
|
||||
feeds.append((u'Gezondheid & Wetenschap',
|
||||
u'http://www.ad.nl/nieuws/gezondheidwetenschap/rss.xml'))
|
||||
feeds.append((u'Economie', u'http://www.ad.nl/nieuws/economie/rss.xml'))
|
||||
feeds.append((u'Nederlands Voetbal', u'http://www.ad.nl/sportwereld/nederlandsvoetbal/rss.xml'))
|
||||
feeds.append((u'Buitenlands Voetbal', u'http://www.ad.nl/sportwereld/buitenlandsvoetbal/rss.xml'))
|
||||
feeds.append((u'Champions League/Europa League', u'http://www.ad.nl/sportwereld/championsleagueeuropaleague/rss.xml'))
|
||||
feeds.append((u'Wielrennen', u'http://www.ad.nl/sportwereld/wielrennen/rss.xml'))
|
||||
feeds.append((u'Nederlands Voetbal',
|
||||
u'http://www.ad.nl/sportwereld/nederlandsvoetbal/rss.xml'))
|
||||
feeds.append((u'Buitenlands Voetbal',
|
||||
u'http://www.ad.nl/sportwereld/buitenlandsvoetbal/rss.xml'))
|
||||
feeds.append((u'Champions League/Europa League',
|
||||
u'http://www.ad.nl/sportwereld/championsleagueeuropaleague/rss.xml'))
|
||||
feeds.append(
|
||||
(u'Wielrennen', u'http://www.ad.nl/sportwereld/wielrennen/rss.xml'))
|
||||
feeds.append((u'Tennis', u'http://www.ad.nl/sportwereld/tennis/rss.xml'))
|
||||
feeds.append((u'Formule 1', u'http://www.ad.nl/sportwereld/formule1/rss.xml'))
|
||||
feeds.append((u'Meer Sport', u'http://www.ad.nl/sportwereld/meersport/rss.xml'))
|
||||
feeds.append(
|
||||
(u'Formule 1', u'http://www.ad.nl/sportwereld/formule1/rss.xml'))
|
||||
feeds.append(
|
||||
(u'Meer Sport', u'http://www.ad.nl/sportwereld/meersport/rss.xml'))
|
||||
feeds.append((u'Celebs', u'http://www.ad.nl/showbizz/celebs/rss.xml'))
|
||||
feeds.append((u'Film', u'http://www.ad.nl/showbizz/film/rss.xml'))
|
||||
feeds.append((u'Muziek', u'http://www.ad.nl/showbizz/muziek/rss.xml'))
|
||||
feeds.append((u'TV', u'http://www.ad.nl/showbizz/tv/rss.xml'))
|
||||
feeds.append((u'Kunst & Literatuur', u'http://www.ad.nl/showbizz/kunstenliteratuur/rss.xml'))
|
||||
feeds.append((u'Kunst & Literatuur',
|
||||
u'http://www.ad.nl/showbizz/kunstenliteratuur/rss.xml'))
|
||||
feeds.append((u'Jouw Wereld', u'http://www.ad.nl/you/rss.xml'))
|
||||
feeds.append((u'Consument', u'http://www.ad.nl/consument/rss.xml'))
|
||||
feeds.append((u'Autowereld', u'http://www.ad.nl/autowereld/rss.xml'))
|
||||
feeds.append((u'Reiswereld', u'http://www.ad.nl/reiswereld/rss.xml'))
|
||||
feeds.append((u'Internet', u'http://www.ad.nl/digitaal/internet/rss.xml'))
|
||||
feeds.append((u'Games', u'http://www.ad.nl/digitaal/games/rss.xml'))
|
||||
feeds.append((u'Multimedia', u'http://www.ad.nl/digitaal/multimedia/rss.xml'))
|
||||
feeds.append(
|
||||
(u'Multimedia', u'http://www.ad.nl/digitaal/multimedia/rss.xml'))
|
||||
feeds.append((u'Planet Watch', u'http://www.ad.nl/planetwatch/rss.xml'))
|
||||
|
||||
extra_css = '''
|
||||
@ -71,7 +85,8 @@ class ADRecipe(BasicNewsRecipe):
|
||||
def print_version(self, url):
|
||||
parts = url.split('/')
|
||||
print_url = 'http://' + parts[2] + '/' + parts[3] + '/' + parts[4] + '/' + parts[5] + '/' \
|
||||
+ parts[10] + '/' + parts[7] + '/print/' + parts[8] + '/' + parts[9] + '/' + parts[13]
|
||||
+ parts[10] + '/' + parts[7] + '/print/' + \
|
||||
parts[8] + '/' + parts[9] + '/' + parts[13]
|
||||
|
||||
return print_url
|
||||
|
||||
|
@ -9,6 +9,7 @@ adevarul.ro
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class Adevarul(BasicNewsRecipe):
|
||||
title = u'Adev\u0103rul'
|
||||
language = 'ro'
|
||||
@ -25,35 +26,21 @@ class Adevarul(BasicNewsRecipe):
|
||||
cover_url = 'http://upload.wikimedia.org/wikipedia/en/d/d6/Logo_noul_adevarul.png'
|
||||
|
||||
conversion_options = {
|
||||
'comments' : description
|
||||
,'tags' : category
|
||||
,'language' : language
|
||||
,'publisher' : publisher
|
||||
'comments': description, 'tags': category, 'language': language, 'publisher': publisher
|
||||
}
|
||||
|
||||
keep_only_tags = [ dict(name='div', attrs={'class':'article_header'})
|
||||
,dict(name='div', attrs={'class':'bb-tu first-t bb-article-body'})
|
||||
keep_only_tags = [dict(name='div', attrs={'class': 'article_header'}), dict(name='div', attrs={'class': 'bb-tu first-t bb-article-body'})
|
||||
]
|
||||
|
||||
|
||||
remove_tags = [
|
||||
dict(name='li', attrs={'class':'author'})
|
||||
,dict(name='li', attrs={'class':'date'})
|
||||
,dict(name='li', attrs={'class':'comments'})
|
||||
,dict(name='div', attrs={'class':'bb-wg-article_related_attachements'})
|
||||
,dict(name='div', attrs={'class':'bb-md bb-md-article_comments'})
|
||||
,dict(name='form', attrs={'id':'bb-comment-create-form'})
|
||||
,dict(name='div', attrs={'id':'mediatag'})
|
||||
,dict(name='div', attrs={'id':'ft'})
|
||||
,dict(name='div', attrs={'id':'comment_wrapper'})
|
||||
dict(name='li', attrs={'class': 'author'}), dict(name='li', attrs={'class': 'date'}), dict(name='li', attrs={'class': 'comments'}), dict(name='div', attrs={'class': 'bb-wg-article_related_attachements'}), dict(name='div', attrs={'class': 'bb-md bb-md-article_comments'}), dict(name='form', attrs={'id': 'bb-comment-create-form'}), dict(name='div', attrs={'id': 'mediatag'}), dict(name='div', attrs={'id': 'ft'}), dict(name='div', attrs={'id': 'comment_wrapper'}) # noqa
|
||||
]
|
||||
|
||||
remove_tags_after = [
|
||||
dict(name='div', attrs={'id':'comment_wrapper'}),
|
||||
dict(name='div', attrs={'id': 'comment_wrapper'}),
|
||||
]
|
||||
|
||||
feeds = [ (u'\u0218tiri', u'http://www.adevarul.ro/rss/latest') ]
|
||||
feeds = [(u'\u0218tiri', u'http://www.adevarul.ro/rss/latest')]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
return self.adeify_images(soup)
|
||||
|
||||
|
@ -10,6 +10,7 @@ http://www.adnkronos.com/
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class Adnkronos(BasicNewsRecipe):
|
||||
__author__ = 'Gabriele Marini'
|
||||
description = 'News agency'
|
||||
@ -27,20 +28,19 @@ class Adnkronos(BasicNewsRecipe):
|
||||
recursion = 10
|
||||
|
||||
remove_javascript = True
|
||||
|
||||
def get_article_url(self, article):
|
||||
link = article.get('id', article.get('guid', None))
|
||||
return link
|
||||
|
||||
extra_css = ' .newsAbstract{font-style: italic} '
|
||||
keep_only_tags = [dict(name='div', attrs={'class':['breadCrumbs','newsTop','newsText']})
|
||||
keep_only_tags = [dict(name='div', attrs={'class': ['breadCrumbs', 'newsTop', 'newsText']})
|
||||
]
|
||||
|
||||
|
||||
remove_tags = [
|
||||
dict(name='div', attrs={'class':['leogoo','leogoo2']})
|
||||
dict(name='div', attrs={'class': ['leogoo', 'leogoo2']})
|
||||
]
|
||||
|
||||
|
||||
feeds = [
|
||||
(u'Prima Pagina', u'http://rss.adnkronos.com/RSS_PrimaPagina.xml'),
|
||||
(u'Ultima Ora', u'http://rss.adnkronos.com/RSS_Ultimora.xml'),
|
||||
@ -56,4 +56,3 @@ class Adnkronos(BasicNewsRecipe):
|
||||
(u'Sostenibilita', u'http://rss.adnkronos.com/RSS_Sostenibilita.xml'),
|
||||
(u'Salute', u'http://rss.adnkronos.com/RSS_Salute.xml')
|
||||
]
|
||||
|
||||
|
@ -1,5 +1,6 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class AdvancedUserRecipe1336986047(BasicNewsRecipe):
|
||||
title = u'Ads of the World'
|
||||
oldest_article = 7
|
||||
@ -11,16 +12,15 @@ class AdvancedUserRecipe1336986047(BasicNewsRecipe):
|
||||
|
||||
no_stylesheets = True
|
||||
keep_only_tags = [
|
||||
dict(name='div', attrs={'id':'primary'})
|
||||
dict(name='div', attrs={'id': 'primary'})
|
||||
]
|
||||
|
||||
remove_tags = [
|
||||
dict(name='ul', attrs={'class':'links inline'})
|
||||
,dict(name='div', attrs={'class':'form-item'})
|
||||
,dict(name='div', attrs={'id':['options', 'comments']})
|
||||
,dict(name='ul', attrs={'id':'nodePager'})
|
||||
dict(name='ul', attrs={'class': 'links inline'}), dict(name='div', attrs={'class': 'form-item'}), dict(
|
||||
name='div', attrs={'id': ['options', 'comments']}), dict(name='ul', attrs={'id': 'nodePager'})
|
||||
]
|
||||
|
||||
reverse_article_order = True
|
||||
masthead_url = 'http://bigcatgroup.co.uk/files/2011/01/05-ads-of-the-world.png'
|
||||
feeds = [(u'Ads of the world', u'http://feeds.feedburner.com/adsoftheworld-latest')]
|
||||
feeds = [
|
||||
(u'Ads of the world', u'http://feeds.feedburner.com/adsoftheworld-latest')]
|
||||
|
@ -1,8 +1,10 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class Adventure_zone(BasicNewsRecipe):
|
||||
title = u'Adventure Zone'
|
||||
__author__ = 'fenuks'
|
||||
description = u'Czytaj więcej o przygodzie - codzienne nowinki. Szukaj u nas solucji i poradników, czytaj recenzje i zapowiedzi. Także galeria, pliki oraz forum dla wszystkich fanów gier przygodowych.'
|
||||
description = u'Czytaj więcej o przygodzie - codzienne nowinki. Szukaj u nas solucji i poradników, czytaj recenzje i zapowiedzi. Także galeria, pliki oraz forum dla wszystkich fanów gier przygodowych.' # noqa
|
||||
category = 'games'
|
||||
language = 'pl'
|
||||
BASEURL = 'http://www.adventure-zone.info/fusion/'
|
||||
@ -13,20 +15,20 @@ class Adventure_zone(BasicNewsRecipe):
|
||||
cover_url = 'http://www.adventure-zone.info/inne/logoaz_2012.png'
|
||||
remove_attributes = ['style']
|
||||
use_embedded_content = False
|
||||
keep_only_tags = [dict(attrs={'class':'content'})]
|
||||
remove_tags = [dict(attrs={'class':'footer'})]
|
||||
keep_only_tags = [dict(attrs={'class': 'content'})]
|
||||
remove_tags = [dict(attrs={'class': 'footer'})]
|
||||
feeds = [(u'Nowinki', u'http://www.adventure-zone.info/fusion/rss/index.php')]
|
||||
|
||||
def skip_ad_pages(self, soup):
|
||||
skip_tag = soup.body.find(attrs={'class':'content'})
|
||||
skip_tag = soup.body.find(attrs={'class': 'content'})
|
||||
skip_tag = skip_tag.findAll(name='a')
|
||||
title = soup.title.string.lower()
|
||||
if (('zapowied' in title) or ('recenzj' in title) or ('solucj' in title) or ('poradnik' in title)):
|
||||
for r in skip_tag:
|
||||
if r.strong and r.strong.string:
|
||||
word=r.strong.string.lower()
|
||||
word = r.strong.string.lower()
|
||||
if (('zapowied' in word) or ('recenzj' in word) or ('solucj' in word) or ('poradnik' in word)):
|
||||
return self.index_to_soup(self.BASEURL+r['href'], raw=True)
|
||||
return self.index_to_soup(self.BASEURL + r['href'], raw=True)
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
for link in soup.findAll('a', href=True):
|
||||
|
@ -6,6 +6,7 @@ www.adventuregamers.com
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class AdventureGamers(BasicNewsRecipe):
|
||||
title = u'Adventure Gamers'
|
||||
language = 'en'
|
||||
@ -14,7 +15,6 @@ class AdventureGamers(BasicNewsRecipe):
|
||||
publisher = 'Adventure Gamers'
|
||||
category = 'news, games, adventure, technology'
|
||||
oldest_article = 10
|
||||
#delay = 10
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
encoding = 'utf8'
|
||||
@ -35,20 +35,16 @@ class AdventureGamers(BasicNewsRecipe):
|
||||
"""
|
||||
|
||||
conversion_options = {
|
||||
'comment' : description
|
||||
, 'tags' : category
|
||||
, 'publisher' : publisher
|
||||
, 'language' : language
|
||||
'comment': description, 'tags': category, 'publisher': publisher, 'language': language
|
||||
}
|
||||
|
||||
keep_only_tags = [dict(name='div', attrs={'class':'cleft_inn'})]
|
||||
keep_only_tags = [dict(name='div', attrs={'class': 'cleft_inn'})]
|
||||
remove_tags = [
|
||||
dict(name=['object','link','embed','form','iframe','meta'])
|
||||
,dict(name='a', attrs={'href':'http://www.adventuregamers.com/about/scoring'})
|
||||
,dict(name='a', attrs={'href':'http://www.adventuregamers.com/about/policies'})
|
||||
dict(name=['object', 'link', 'embed', 'form', 'iframe', 'meta']), dict(name='a', attrs={
|
||||
'href': 'http://www.adventuregamers.com/about/scoring'}), dict(name='a', attrs={'href': 'http://www.adventuregamers.com/about/policies'})
|
||||
]
|
||||
remove_tags_after = [dict(name='div', attrs={'class':'bodytext'})]
|
||||
remove_attributes = ['width','height']
|
||||
remove_tags_after = [dict(name='div', attrs={'class': 'bodytext'})]
|
||||
remove_attributes = ['width', 'height']
|
||||
|
||||
feeds = [(u'Articles', u'http://www.adventuregamers.com/rss/')]
|
||||
|
||||
@ -59,29 +55,28 @@ class AdventureGamers(BasicNewsRecipe):
|
||||
return url
|
||||
|
||||
def append_page(self, soup, appendtag, position):
|
||||
pager = soup.find('div', attrs={'class':'pagination_big'})
|
||||
pager = soup.find('div', attrs={'class': 'pagination_big'})
|
||||
if pager:
|
||||
nextpage = soup.find('a', attrs={'class':'next-page'})
|
||||
nextpage = soup.find('a', attrs={'class': 'next-page'})
|
||||
if nextpage:
|
||||
nexturl = nextpage['href']
|
||||
soup2 = self.index_to_soup(nexturl)
|
||||
texttag = soup2.find('div', attrs={'class':'bodytext'})
|
||||
texttag = soup2.find('div', attrs={'class': 'bodytext'})
|
||||
for it in texttag.findAll(style=True):
|
||||
del it['style']
|
||||
newpos = len(texttag.contents)
|
||||
self.append_page(soup2,texttag,newpos)
|
||||
self.append_page(soup2, texttag, newpos)
|
||||
texttag.extract()
|
||||
pager.extract()
|
||||
appendtag.insert(position,texttag)
|
||||
|
||||
appendtag.insert(position, texttag)
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
for item in soup.findAll('div', attrs={'class':'floatright'}):
|
||||
for item in soup.findAll('div', attrs={'class': 'floatright'}):
|
||||
item.extract()
|
||||
self.append_page(soup, soup.body, 3)
|
||||
pager = soup.find('div',attrs={'class':'pagination_big'})
|
||||
pager = soup.find('div', attrs={'class': 'pagination_big'})
|
||||
if pager:
|
||||
pager.extract()
|
||||
return self.adeify_images(soup)
|
||||
|
@ -1,5 +1,6 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class Aftenposten(BasicNewsRecipe):
|
||||
title = u'Aftenposten'
|
||||
__author__ = 'davotibarna'
|
||||
@ -17,4 +18,3 @@ class Aftenposten(BasicNewsRecipe):
|
||||
|
||||
def print_version(self, url):
|
||||
return url.replace('#xtor=RSS-3', '?service=print')
|
||||
|
||||
|
@ -8,6 +8,7 @@ boljevac.blogspot.com
|
||||
import re
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class AgroGerila(BasicNewsRecipe):
|
||||
title = 'Agro Gerila'
|
||||
__author__ = 'Darko Miletic'
|
||||
@ -19,13 +20,10 @@ class AgroGerila(BasicNewsRecipe):
|
||||
no_stylesheets = True
|
||||
use_embedded_content = True
|
||||
publication_type = 'blog'
|
||||
extra_css = ' @font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: "Trebuchet MS",Trebuchet,Verdana,sans1,sans-serif} .article_description{font-family: sans1, sans-serif} img{margin-bottom: 0.8em; border: 1px solid #333333; padding: 4px } '
|
||||
extra_css = ' @font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: "Trebuchet MS",Trebuchet,Verdana,sans1,sans-serif} .article_description{font-family: sans1, sans-serif} img{margin-bottom: 0.8em; border: 1px solid #333333; padding: 4px } ' # noqa
|
||||
|
||||
conversion_options = {
|
||||
'comment' : description
|
||||
, 'tags' : 'film, blog, srbija'
|
||||
, 'publisher': 'Dry-Na-Nord'
|
||||
, 'language' : language
|
||||
'comment': description, 'tags': 'film, blog, srbija', 'publisher': 'Dry-Na-Nord', 'language': language
|
||||
}
|
||||
|
||||
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
||||
@ -36,5 +34,3 @@ class AgroGerila(BasicNewsRecipe):
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
return self.adeify_images(soup)
|
||||
|
||||
|
||||
|
@ -6,6 +6,7 @@ www.aif.ru
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class AIF_ru(BasicNewsRecipe):
|
||||
title = 'Arguments & Facts - Russian'
|
||||
__author__ = 'Darko Miletic'
|
||||
@ -25,16 +26,12 @@ class AIF_ru(BasicNewsRecipe):
|
||||
img{display: block}
|
||||
"""
|
||||
keep_only_tags = [
|
||||
dict(name='h1', attrs={'class':'title'})
|
||||
,dict(name='div', attrs={'class':'prew_tags'})
|
||||
,dict(name='article', attrs={'class':lambda x: x and 'articl_body' in x.split()})
|
||||
dict(name='h1', attrs={'class': 'title'}), dict(name='div', attrs={'class': 'prew_tags'}), dict(
|
||||
name='article', attrs={'class': lambda x: x and 'articl_body' in x.split()})
|
||||
]
|
||||
remove_tags = [
|
||||
dict(name=['iframe','object','link','base','input','meta'])
|
||||
,dict(name='div',attrs={'class':'in-topic'})
|
||||
,dict(name='div', attrs={'class':lambda x: x and 'related_article' in x.split()})
|
||||
,dict(name='div', attrs={'class':lambda x: x and 'articl_tag' in x.split()})
|
||||
dict(name=['iframe', 'object', 'link', 'base', 'input', 'meta']), dict(name='div', attrs={'class': 'in-topic'}), dict(name='div', attrs={
|
||||
'class': lambda x: x and 'related_article' in x.split()}), dict(name='div', attrs={'class': lambda x: x and 'articl_tag' in x.split()})
|
||||
]
|
||||
|
||||
feeds = [(u'News', u'http://www.aif.ru/rss/all.php')]
|
||||
|
||||
|
@ -1,5 +1,6 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class AirForceTimes(BasicNewsRecipe):
|
||||
title = 'Air Force Times'
|
||||
__author__ = 'jde'
|
||||
@ -12,7 +13,7 @@ class AirForceTimes(BasicNewsRecipe):
|
||||
tags = 'news, U.S. Air Force'
|
||||
cover_url = 'http://www.airforcetimes.com/images/logo_airforcetimes_alert.jpg'
|
||||
masthead_url = 'http://www.airforcetimes.com/images/logo_airforcetimes_alert.jpg'
|
||||
oldest_article = 7 #days
|
||||
oldest_article = 7 # days
|
||||
max_articles_per_feed = 25
|
||||
publication_type = 'newspaper'
|
||||
no_stylesheets = True
|
||||
@ -24,8 +25,6 @@ class AirForceTimes(BasicNewsRecipe):
|
||||
remove_empty_feeds = True
|
||||
auto_cleanup = True
|
||||
|
||||
|
||||
|
||||
feeds = [
|
||||
|
||||
('News', 'http://www.airforcetimes.com/rss_news.php'),
|
||||
@ -37,7 +36,3 @@ class AirForceTimes(BasicNewsRecipe):
|
||||
('Entertainment', 'http://www.airforcetimes.com/rss_entertainment.php'),
|
||||
('Guard & Reserve', 'http://www.airforcetimes.com/rss_guard.php'),
|
||||
]
|
||||
|
||||
|
||||
|
||||
|
||||
|
@ -6,10 +6,12 @@ __version__ = '0.1'
|
||||
__date__ = '2015/01/10'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
import datetime, re
|
||||
import datetime
|
||||
import re
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
from calibre.ebooks.BeautifulSoup import Tag
|
||||
|
||||
|
||||
class AdvancedUserRecipe1282101454(BasicNewsRecipe):
|
||||
now = datetime.datetime.now()
|
||||
title = 'The AJC'
|
||||
@ -24,13 +26,15 @@ class AdvancedUserRecipe1282101454(BasicNewsRecipe):
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
|
||||
# The AJC lists identical articles in multiple feeds; this removes them based on their URL
|
||||
# The AJC lists identical articles in multiple feeds; this removes them
|
||||
# based on their URL
|
||||
ignore_duplicate_articles = {'title', 'url'}
|
||||
|
||||
# And this says "Hey, AJC, different feeds should mean something!"
|
||||
remove_empty_feeds = True
|
||||
|
||||
# Sets whether a feed has full articles embedded in it. The AJC feeds do not.
|
||||
# Sets whether a feed has full articles embedded in it. The AJC feeds do
|
||||
# not.
|
||||
use_embedded_content = False
|
||||
|
||||
masthead_url = 'http://gawand.org/wp-content/uploads/2010/06/ajc-logo.gif'
|
||||
@ -39,7 +43,8 @@ class AdvancedUserRecipe1282101454(BasicNewsRecipe):
|
||||
# articels will be dropped.
|
||||
feeds = [
|
||||
('Breaking News', 'http://www.ajc.com/list/rss/online/ajc-auto-list-iphone-topnews/aFKq/'),
|
||||
('Metro and Georgia', 'http://www.ajc.com/list/rss/news/local/news-georgia-and-region/aCxP/'),
|
||||
('Metro and Georgia',
|
||||
'http://www.ajc.com/list/rss/news/local/news-georgia-and-region/aCxP/'),
|
||||
('Business', 'http://www.ajc.com/feeds/categories/business/'),
|
||||
('Health', 'http://www.ajc.com/feeds/categories/health/'),
|
||||
# ('Braves', 'http://www.ajc.com/list/rss/sports/baseball/atlanta-braves-news/aGpN/'),
|
||||
@ -52,19 +57,23 @@ class AdvancedUserRecipe1282101454(BasicNewsRecipe):
|
||||
author_reg_exp = '^.*cm-story-author.*$'
|
||||
|
||||
keep_only_tags = [
|
||||
dict(name='div', attrs={'class':re.compile(headline_reg_exp, re.IGNORECASE)}),
|
||||
dict(name='div', attrs={'class':'cm-story-meta'}),
|
||||
dict(name='div', attrs={'class':re.compile(author_reg_exp, re.IGNORECASE)}),
|
||||
dict(name='meta', attrs={'name':'description'}),
|
||||
dict(name='div', attrs={'class':re.compile(story_body_reg_exp, re.IGNORECASE)}),
|
||||
dict(name='div', attrs={'class': re.compile(
|
||||
headline_reg_exp, re.IGNORECASE)}),
|
||||
dict(name='div', attrs={'class': 'cm-story-meta'}),
|
||||
dict(name='div', attrs={'class': re.compile(
|
||||
author_reg_exp, re.IGNORECASE)}),
|
||||
dict(name='meta', attrs={'name': 'description'}),
|
||||
dict(name='div', attrs={'class': re.compile(
|
||||
story_body_reg_exp, re.IGNORECASE)}),
|
||||
]
|
||||
|
||||
premium_reg_exp = '^.*cmPremiumContent.*$'
|
||||
footer_reg_exp = '^.*cm-story-footer.*$'
|
||||
|
||||
remove_tags = [
|
||||
dict(name='div', attrs={'class':re.compile(footer_reg_exp, re.IGNORECASE)}),
|
||||
dict(name='div', attrs={'class':'cm-inline-related-group'})
|
||||
dict(name='div', attrs={'class': re.compile(
|
||||
footer_reg_exp, re.IGNORECASE)}),
|
||||
dict(name='div', attrs={'class': 'cm-inline-related-group'})
|
||||
]
|
||||
|
||||
extra_css = 'body { font-family: verdana, helvetica, sans-serif; } \
|
||||
@ -74,22 +83,24 @@ class AdvancedUserRecipe1282101454(BasicNewsRecipe):
|
||||
.cm-story-author { display: block; font-size: 80%; font-style: italic; }'
|
||||
|
||||
# I would love to remove these completely from the finished product, but I can't see how at the momemnt.
|
||||
# Retuning "None" from preprocess_html(soup) as suggested in mobileread forums leads to errors.
|
||||
# Retuning "None" from preprocess_html(soup) as suggested in mobileread
|
||||
# forums leads to errors.
|
||||
def preprocess_html(self, soup):
|
||||
premium = soup.find('div', attrs={'class':re.compile(self.premium_reg_exp, re.IGNORECASE)})
|
||||
premium = soup.find('div', attrs={'class': re.compile(
|
||||
self.premium_reg_exp, re.IGNORECASE)})
|
||||
if premium:
|
||||
return None
|
||||
crosslink = soup.find('a', attrs={'class':'cm-feed-story-more-link'})
|
||||
crosslink = soup.find('a', attrs={'class': 'cm-feed-story-more-link'})
|
||||
if crosslink:
|
||||
return None
|
||||
return soup
|
||||
|
||||
def populate_article_metadata(self, article, soup, first):
|
||||
for meta in soup.findAll('meta', attrs={'name':'description'}):
|
||||
for meta in soup.findAll('meta', attrs={'name': 'description'}):
|
||||
article.text_summary = meta['content']
|
||||
article.summary = meta['content']
|
||||
|
||||
lead = soup.find('div', attrs={'class':'cm-story-photo'})
|
||||
lead = soup.find('div', attrs={'class': 'cm-story-photo'})
|
||||
if lead:
|
||||
lead = lead.find('img')
|
||||
else:
|
||||
@ -98,10 +109,10 @@ class AdvancedUserRecipe1282101454(BasicNewsRecipe):
|
||||
self.add_toc_thumbnail(article, lead['src'])
|
||||
names = ''
|
||||
comma = ''
|
||||
for div in soup.findAll('div', attrs={'class':re.compile(self.author_reg_exp, re.IGNORECASE)}):
|
||||
for div in soup.findAll('div', attrs={'class': re.compile(self.author_reg_exp, re.IGNORECASE)}):
|
||||
div.extract()
|
||||
for auth in div.findAll('a'):
|
||||
if (auth.has_key('class') and auth['class'] == 'cm-source-image'):
|
||||
if (auth.has_key('class') and auth['class'] == 'cm-source-image'): # noqa
|
||||
continue
|
||||
names = names + comma + auth.contents[0]
|
||||
comma = ', '
|
||||
@ -110,7 +121,6 @@ class AdvancedUserRecipe1282101454(BasicNewsRecipe):
|
||||
tag = Tag(soup, 'div', [('class', 'cm-story-author')])
|
||||
tag.append("by: ")
|
||||
tag.append(names)
|
||||
meta = soup.find('div', attrs={'class':'cm-story-meta'})
|
||||
meta = soup.find('div', attrs={'class': 'cm-story-meta'})
|
||||
meta_idx = meta.parent.contents.index(meta)
|
||||
meta.parent.insert(meta_idx + 1, tag)
|
||||
|
||||
|
@ -6,6 +6,7 @@ ajiajin.com/blog
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class AjiajinBlog(BasicNewsRecipe):
|
||||
title = u'Ajiajin blog'
|
||||
__author__ = 'Hiroshi Miura'
|
||||
@ -19,5 +20,3 @@ class AjiajinBlog(BasicNewsRecipe):
|
||||
encoding = 'utf-8'
|
||||
|
||||
feeds = [(u'blog', u'http://feeds.feedburner.com/Asiajin')]
|
||||
|
||||
|
||||
|
@ -2,16 +2,15 @@
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class Aksiyon (BasicNewsRecipe):
|
||||
|
||||
title = u'Aksiyon Dergisi'
|
||||
__author__ = u'thomass'
|
||||
description = 'Haftalık haber dergisi '
|
||||
oldest_article =13
|
||||
max_articles_per_feed =100
|
||||
oldest_article = 13
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
#delay = 1
|
||||
#use_embedded_content = False
|
||||
encoding = 'utf-8'
|
||||
publisher = 'Aksiyon'
|
||||
category = 'news, haberler,TR,gazete'
|
||||
@ -20,28 +19,34 @@ class Aksiyon (BasicNewsRecipe):
|
||||
auto_cleanup = True
|
||||
cover_img_url = 'http://www.aksiyon.com.tr/aksiyon/images/aksiyon/top-page/aksiyon_top_r2_c1.jpg'
|
||||
masthead_url = 'http://aksiyon.com.tr/aksiyon/images/aksiyon/top-page/aksiyon_top_r2_c1.jpg'
|
||||
ignore_duplicate_articles = { 'title', 'url' }
|
||||
remove_empty_feeds= True
|
||||
ignore_duplicate_articles = {'title', 'url'}
|
||||
remove_empty_feeds = True
|
||||
feeds = [
|
||||
( u'KAPAK', u'http://www.aksiyon.com.tr/aksiyon/rss?sectionId=26'),
|
||||
( u'ANASAYFA', u'http://www.aksiyon.com.tr/aksiyon/rss?sectionId=0'),
|
||||
( u'EKONOMİ', u'http://www.aksiyon.com.tr/aksiyon/rss?sectionId=35'),
|
||||
( u'EKOANALİZ', u'http://www.aksiyon.com.tr/aksiyon/rss?sectionId=284'),
|
||||
( u'YAZARLAR', u'http://www.aksiyon.com.tr/aksiyon/rss?sectionId=17'),
|
||||
( u'KİTAPLIK', u'http://www.aksiyon.com.tr/aksiyon/rss?sectionId=13'),
|
||||
( u'SİNEMA', u'http://www.aksiyon.com.tr/aksiyon/rss?sectionId=14'),
|
||||
( u'ARKA PENCERE', u'http://www.aksiyon.com.tr/aksiyon/rss?sectionId=27'),
|
||||
( u'DÜNYA', u'http://www.aksiyon.com.tr/aksiyon/rss?sectionId=32'),
|
||||
( u'DOSYALAR', u'http://www.aksiyon.com.tr/aksiyon/rss?sectionId=34'),
|
||||
( u'KARAKUTU', u'http://www.aksiyon.com.tr/aksiyon/rss?sectionId=11'),
|
||||
( u'KÜLTÜR & SANAT', u'http://www.aksiyon.com.tr/aksiyon/rss?sectionId=12'),
|
||||
( u'SPOR', u'http://www.aksiyon.com.tr/aksiyon/rss?sectionId=38'),
|
||||
( u'BİLİŞİM - TEKNOLOJİ', u'http://www.aksiyon.com.tr/aksiyon/rss?sectionId=39'),
|
||||
( u'3. BOYUT', u'http://www.aksiyon.com.tr/aksiyon/rss?sectionId=172'),
|
||||
( u'HAYAT BİLGİSİ', u'http://www.aksiyon.com.tr/aksiyon/rss?sectionId=283'),
|
||||
( u'İŞ DÜNYASI', u'http://www.aksiyon.com.tr/aksiyon/rss?sectionId=283'),
|
||||
(u'KAPAK', u'http://www.aksiyon.com.tr/aksiyon/rss?sectionId=26'),
|
||||
(u'ANASAYFA', u'http://www.aksiyon.com.tr/aksiyon/rss?sectionId=0'),
|
||||
(u'EKONOMİ', u'http://www.aksiyon.com.tr/aksiyon/rss?sectionId=35'),
|
||||
(u'EKOANALİZ', u'http://www.aksiyon.com.tr/aksiyon/rss?sectionId=284'),
|
||||
(u'YAZARLAR', u'http://www.aksiyon.com.tr/aksiyon/rss?sectionId=17'),
|
||||
(u'KİTAPLIK', u'http://www.aksiyon.com.tr/aksiyon/rss?sectionId=13'),
|
||||
(u'SİNEMA', u'http://www.aksiyon.com.tr/aksiyon/rss?sectionId=14'),
|
||||
(u'ARKA PENCERE',
|
||||
u'http://www.aksiyon.com.tr/aksiyon/rss?sectionId=27'),
|
||||
(u'DÜNYA', u'http://www.aksiyon.com.tr/aksiyon/rss?sectionId=32'),
|
||||
(u'DOSYALAR', u'http://www.aksiyon.com.tr/aksiyon/rss?sectionId=34'),
|
||||
(u'KARAKUTU', u'http://www.aksiyon.com.tr/aksiyon/rss?sectionId=11'),
|
||||
(u'KÜLTÜR & SANAT',
|
||||
u'http://www.aksiyon.com.tr/aksiyon/rss?sectionId=12'),
|
||||
(u'SPOR', u'http://www.aksiyon.com.tr/aksiyon/rss?sectionId=38'),
|
||||
(u'BİLİŞİM - TEKNOLOJİ',
|
||||
u'http://www.aksiyon.com.tr/aksiyon/rss?sectionId=39'),
|
||||
(u'3. BOYUT', u'http://www.aksiyon.com.tr/aksiyon/rss?sectionId=172'),
|
||||
(u'HAYAT BİLGİSİ',
|
||||
u'http://www.aksiyon.com.tr/aksiyon/rss?sectionId=283'),
|
||||
(u'İŞ DÜNYASI',
|
||||
u'http://www.aksiyon.com.tr/aksiyon/rss?sectionId=283'),
|
||||
]
|
||||
|
||||
#def print_version(self, url):
|
||||
#return url.replace('http://www.aksiyon.com.tr/aksiyon/newsDetail_getNewsById.action?load=detay&', 'http://www.aksiyon.com.tr/aksiyon/mobile_detailn.action?')
|
||||
|
||||
# def print_version(self, url):
|
||||
# return
|
||||
# url.replace('http://www.aksiyon.com.tr/aksiyon/newsDetail_getNewsById.action?load=detay&',
|
||||
# 'http://www.aksiyon.com.tr/aksiyon/mobile_detailn.action?')
|
||||
|
@ -7,12 +7,13 @@ akter.co.rs
|
||||
import re
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class Akter(BasicNewsRecipe):
|
||||
title = 'AKTER - Nedeljnik'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'AKTER - nedeljni politicki magazin savremene Srbije'
|
||||
publisher = 'Akter Media Group d.o.o.'
|
||||
category = 'vesti, online vesti, najnovije vesti, politika, sport, ekonomija, biznis, finansije, berza, kultura, zivot, putovanja, auto, automobili, tehnologija, politicki magazin, dogadjaji, desavanja, lifestyle, zdravlje, zdravstvo, vest, novine, nedeljnik, srbija, novi sad, vojvodina, svet, drustvo, zabava, republika srpska, beograd, intervju, komentar, reportaza, arhiva vesti, news, serbia, politics'
|
||||
category = 'vesti, online vesti, najnovije vesti, politika, sport, ekonomija, biznis, finansije, berza, kultura, zivot, putovanja, auto, automobili, tehnologija, politicki magazin, dogadjaji, desavanja, lifestyle, zdravlje, zdravstvo, vest, novine, nedeljnik, srbija, novi sad, vojvodina, svet, drustvo, zabava, republika srpska, beograd, intervju, komentar, reportaza, arhiva vesti, news, serbia, politics' # noqa
|
||||
oldest_article = 8
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
@ -29,14 +30,11 @@ class Akter(BasicNewsRecipe):
|
||||
"""
|
||||
|
||||
conversion_options = {
|
||||
'comment' : description
|
||||
, 'tags' : category
|
||||
, 'publisher': publisher
|
||||
, 'language' : language
|
||||
'comment': description, 'tags': category, 'publisher': publisher, 'language': language
|
||||
}
|
||||
|
||||
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
||||
keep_only_tags = [dict(name='div', attrs={'id':'section_to_print'})]
|
||||
keep_only_tags = [dict(name='div', attrs={'id': 'section_to_print'})]
|
||||
feeds = [(u'Nedeljnik', u'http://akter.co.rs/rss/nedeljnik')]
|
||||
|
||||
def print_version(self, url):
|
||||
@ -45,10 +43,9 @@ class Akter(BasicNewsRecipe):
|
||||
|
||||
def get_cover_url(self):
|
||||
soup = self.index_to_soup('http://www.akter.co.rs/weekly.html')
|
||||
divt = soup.find('div', attrs={'class':'lastissue'})
|
||||
divt = soup.find('div', attrs={'class': 'lastissue'})
|
||||
if divt:
|
||||
imgt = divt.find('img')
|
||||
if imgt:
|
||||
return 'http://www.akter.co.rs' + imgt['src']
|
||||
return None
|
||||
|
||||
|
@ -7,12 +7,12 @@ akter.co.rs
|
||||
import re
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class Akter(BasicNewsRecipe):
|
||||
title = 'AKTER - Dnevnik'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'AKTER - Najnovije vesti iz Srbije'
|
||||
publisher = 'Akter Media Group d.o.o.'
|
||||
category = 'vesti, online vesti, najnovije vesti, politika, sport, ekonomija, biznis, finansije, berza, kultura, zivot, putovanja, auto, automobili, tehnologija, politicki magazin, dogadjaji, desavanja, lifestyle, zdravlje, zdravstvo, vest, novine, nedeljnik, srbija, novi sad, vojvodina, svet, drustvo, zabava, republika srpska, beograd, intervju, komentar, reportaza, arhiva vesti, news, serbia, politics'
|
||||
oldest_article = 8
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
@ -29,14 +29,11 @@ class Akter(BasicNewsRecipe):
|
||||
"""
|
||||
|
||||
conversion_options = {
|
||||
'comment' : description
|
||||
, 'tags' : category
|
||||
, 'publisher': publisher
|
||||
, 'language' : language
|
||||
'comment': description, 'publisher': publisher, 'language': language
|
||||
}
|
||||
|
||||
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
||||
keep_only_tags = [dict(name='div', attrs={'id':'section_to_print'})]
|
||||
keep_only_tags = [dict(name='div', attrs={'id': 'section_to_print'})]
|
||||
feeds = [(u'Vesti', u'http://akter.co.rs/rss/dnevni')]
|
||||
|
||||
def print_version(self, url):
|
||||
|
@ -3,6 +3,7 @@ from __future__ import unicode_literals
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
import re
|
||||
|
||||
|
||||
class aktualneRecipe(BasicNewsRecipe):
|
||||
__author__ = 'bubak'
|
||||
title = u'aktualne.cz'
|
||||
@ -27,29 +28,31 @@ class aktualneRecipe(BasicNewsRecipe):
|
||||
no_stylesheets = True
|
||||
|
||||
remove_attributes = []
|
||||
remove_tags_before = dict(name='h1', attrs={'class':['titulek-clanku']})
|
||||
remove_tags_before = dict(name='h1', attrs={'class': ['titulek-clanku']})
|
||||
filter_regexps = [r'img.aktualne.centrum.cz']
|
||||
remove_tags = [dict(name='div', attrs={'id':['social-bookmark']}),
|
||||
dict(name='div', attrs={'class':['box1', 'svazane-tagy']}),
|
||||
dict(name='div', attrs={'class':'itemcomment id0'}),
|
||||
dict(name='div', attrs={'class':'hlavicka'}),
|
||||
dict(name='div', attrs={'class':'hlavni-menu'}),
|
||||
dict(name='div', attrs={'class':'top-standard-brand-obal'}),
|
||||
dict(name='div', attrs={'class':'breadcrumb'}),
|
||||
dict(name='div', attrs={'id':'start-standard'}),
|
||||
dict(name='div', attrs={'id':'forum'}),
|
||||
dict(name='span', attrs={'class':'akce'}),
|
||||
dict(name='span', attrs={'class':'odrazka vetsi'}),
|
||||
dict(name='div', attrs={'class':'boxP'}),
|
||||
dict(name='div', attrs={'class':'box2'})]
|
||||
remove_tags = [dict(name='div', attrs={'id': ['social-bookmark']}),
|
||||
dict(name='div', attrs={'class': ['box1', 'svazane-tagy']}),
|
||||
dict(name='div', attrs={'class': 'itemcomment id0'}),
|
||||
dict(name='div', attrs={'class': 'hlavicka'}),
|
||||
dict(name='div', attrs={'class': 'hlavni-menu'}),
|
||||
dict(name='div', attrs={
|
||||
'class': 'top-standard-brand-obal'}),
|
||||
dict(name='div', attrs={'class': 'breadcrumb'}),
|
||||
dict(name='div', attrs={'id': 'start-standard'}),
|
||||
dict(name='div', attrs={'id': 'forum'}),
|
||||
dict(name='span', attrs={'class': 'akce'}),
|
||||
dict(name='span', attrs={'class': 'odrazka vetsi'}),
|
||||
dict(name='div', attrs={'class': 'boxP'}),
|
||||
dict(name='div', attrs={'class': 'box2'})]
|
||||
preprocess_regexps = [
|
||||
(re.compile(r'<div class="(contenttitle"|socialni-site|wiki|facebook-promo|facebook-like-button"|meta-akce).*',
|
||||
re.DOTALL|re.IGNORECASE), lambda match: '</body>'),
|
||||
(re.compile(r'<div class="[^"]*poutak-clanek-trojka".*', re.DOTALL|re.IGNORECASE), lambda match: '</body>')]
|
||||
re.DOTALL | re.IGNORECASE), lambda match: '</body>'),
|
||||
(re.compile(r'<div class="[^"]*poutak-clanek-trojka".*', re.DOTALL | re.IGNORECASE), lambda match: '</body>')]
|
||||
|
||||
keep_only_tags = []
|
||||
|
||||
visited_urls = {}
|
||||
|
||||
def get_article_url(self, article):
|
||||
url = BasicNewsRecipe.get_article_url(self, article)
|
||||
if url in self.visited_urls:
|
||||
|
@ -6,6 +6,7 @@ ahram.org.eg
|
||||
'''
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
|
||||
|
||||
class AlAhram(BasicNewsRecipe):
|
||||
title = u'Al-Ahram (الأهرام)'
|
||||
__author__ = 'Hassan Williamson'
|
||||
@ -16,51 +17,60 @@ class AlAhram(BasicNewsRecipe):
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
#delay = 1
|
||||
use_embedded_content = False
|
||||
publisher = 'Al-Ahram'
|
||||
category = 'News'
|
||||
publication_type = 'newsportal'
|
||||
|
||||
extra_css = ' body{ font-family: Verdana,Helvetica,Arial,sans-serif; direction: rtl; } .bbtitle{ font-weight: bold; font-size: 2em; } .bbsubtitle{ font-size: 1.3em; } #WriterImage{ height: 10px; } '
|
||||
extra_css = ' body{ font-family: Verdana,Helvetica,Arial,sans-serif; direction: rtl; } .bbtitle{ font-weight: bold; font-size: 2em; } .bbsubtitle{ font-size: 1.3em; } #WriterImage{ height: 10px; } ' # noqa
|
||||
|
||||
keep_only_tags = [
|
||||
dict(name='div', attrs={'class':['bbcolright']})
|
||||
dict(name='div', attrs={'class': ['bbcolright']})
|
||||
]
|
||||
|
||||
remove_tags = [
|
||||
dict(name='div', attrs={'class':['bbnav', 'bbsp']}),
|
||||
dict(name='div', attrs={'id':['AddThisButton']}),
|
||||
dict(name='a', attrs={'class':['twitter-share-button']}),
|
||||
dict(name='div', attrs={'id':['ReaderCount']}),
|
||||
dict(name='div', attrs={'class': ['bbnav', 'bbsp']}),
|
||||
dict(name='div', attrs={'id': ['AddThisButton']}),
|
||||
dict(name='a', attrs={'class': ['twitter-share-button']}),
|
||||
dict(name='div', attrs={'id': ['ReaderCount']}),
|
||||
]
|
||||
|
||||
remove_attributes = [
|
||||
'width','height','style'
|
||||
'width', 'height', 'style'
|
||||
]
|
||||
|
||||
feeds = [
|
||||
(u'الأولى', 'http://www.ahram.org.eg/archive/RssXml.aspx?CategoryID=25'),
|
||||
(u'الصفحة الثانية', 'http://www.ahram.org.eg/archive/RssXml.aspx?CategoryID=74'),
|
||||
(u'الصفحة الثانية',
|
||||
'http://www.ahram.org.eg/archive/RssXml.aspx?CategoryID=74'),
|
||||
(u'مصر', 'http://www.ahram.org.eg/archive/RssXml.aspx?CategoryID=27'),
|
||||
(u'المشهد السياسي', 'http://www.ahram.org.eg/archive/RssXml.aspx?CategoryID=60'),
|
||||
(u'المشهد السياسي',
|
||||
'http://www.ahram.org.eg/archive/RssXml.aspx?CategoryID=60'),
|
||||
(u'المحافظات', 'http://www.ahram.org.eg/archive/RssXml.aspx?CategoryID=29'),
|
||||
(u'الوطن العربي', 'http://www.ahram.org.eg/archive/RssXml.aspx?CategoryID=31'),
|
||||
(u'الوطن العربي',
|
||||
'http://www.ahram.org.eg/archive/RssXml.aspx?CategoryID=31'),
|
||||
(u'العالم', 'http://www.ahram.org.eg/archive/RssXml.aspx?CategoryID=26'),
|
||||
(u'تقارير المراسلين', 'http://www.ahram.org.eg/archive/RssXml.aspx?CategoryID=2'),
|
||||
(u'تقارير المراسلين',
|
||||
'http://www.ahram.org.eg/archive/RssXml.aspx?CategoryID=2'),
|
||||
(u'تحقيقات', 'http://www.ahram.org.eg/archive/RssXml.aspx?CategoryID=3'),
|
||||
(u'قضايا واراء', 'http://www.ahram.org.eg/archive/RssXml.aspx?CategoryID=4'),
|
||||
(u'قضايا واراء',
|
||||
'http://www.ahram.org.eg/archive/RssXml.aspx?CategoryID=4'),
|
||||
(u'اقتصاد', 'http://www.ahram.org.eg/archive/RssXml.aspx?CategoryID=5'),
|
||||
(u'رياضة', 'http://www.ahram.org.eg/archive/RssXml.aspx?CategoryID=6'),
|
||||
(u'حوادث', 'http://www.ahram.org.eg/archive/RssXml.aspx?CategoryID=38'),
|
||||
(u'دنيا الثقافة', 'http://www.ahram.org.eg/archive/RssXml.aspx?CategoryID=7'),
|
||||
(u'المراة والطفل', 'http://www.ahram.org.eg/archive/RssXml.aspx?CategoryID=8'),
|
||||
(u'دنيا الثقافة',
|
||||
'http://www.ahram.org.eg/archive/RssXml.aspx?CategoryID=7'),
|
||||
(u'المراة والطفل',
|
||||
'http://www.ahram.org.eg/archive/RssXml.aspx?CategoryID=8'),
|
||||
(u'يوم جديد', 'http://www.ahram.org.eg/archive/RssXml.aspx?CategoryID=9'),
|
||||
(u'الكتاب', 'http://www.ahram.org.eg/archive/RssXml.aspx?CategoryID=10'),
|
||||
(u'الاعمدة', 'http://www.ahram.org.eg/archive/RssXml.aspx?CategoryID=11'),
|
||||
(u'أراء حرة', 'http://www.ahram.org.eg/archive/RssXml.aspx?CategoryID=59'),
|
||||
(u'ملفات الاهرام', 'http://www.ahram.org.eg/archive/RssXml.aspx?CategoryID=12'),
|
||||
(u'بريد الاهرام', 'http://www.ahram.org.eg/archive/RssXml.aspx?CategoryID=15'),
|
||||
(u'برلمان الثورة', 'http://www.ahram.org.eg/archive/RssXml.aspx?CategoryID=61'),
|
||||
(u'ملفات الاهرام',
|
||||
'http://www.ahram.org.eg/archive/RssXml.aspx?CategoryID=12'),
|
||||
(u'بريد الاهرام',
|
||||
'http://www.ahram.org.eg/archive/RssXml.aspx?CategoryID=15'),
|
||||
(u'برلمان الثورة',
|
||||
'http://www.ahram.org.eg/archive/RssXml.aspx?CategoryID=61'),
|
||||
(u'الاخيرة', 'http://www.ahram.org.eg/archive/RssXml.aspx?CategoryID=16'),
|
||||
]
|
||||
|
@ -6,8 +6,10 @@ english.aljazeera.net
|
||||
'''
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
def has_cls(x):
|
||||
return dict(attrs={'class':lambda cls: cls and x in cls.split()})
|
||||
return dict(attrs={'class': lambda cls: cls and x in cls.split()})
|
||||
|
||||
|
||||
class AlJazeera(BasicNewsRecipe):
|
||||
title = 'Al Jazeera in English'
|
||||
@ -27,36 +29,39 @@ class AlJazeera(BasicNewsRecipe):
|
||||
#dvArticleDate{font-size: small; color: #999999}
|
||||
"""
|
||||
conversion_options = {
|
||||
'comment' : description , 'tags' : category ,
|
||||
'publisher' : publisher , 'language' : language
|
||||
'comment': description, 'tags': category,
|
||||
'publisher': publisher, 'language': language
|
||||
}
|
||||
keep_only_tags = [
|
||||
dict(id='main-story'),
|
||||
]
|
||||
|
||||
remove_tags = [
|
||||
has_cls('MoreOnTheStory'), has_cls('ArticleBottomToolbar'), dict(smtitle="ShowMore"),
|
||||
dict(name=['object','link','table','meta','base','iframe','embed']),
|
||||
has_cls('MoreOnTheStory'), has_cls(
|
||||
'ArticleBottomToolbar'), dict(smtitle="ShowMore"),
|
||||
dict(name=['object', 'link', 'table',
|
||||
'meta', 'base', 'iframe', 'embed']),
|
||||
]
|
||||
|
||||
feeds = [(u'Al Jazeera English', u'http://english.aljazeera.net/Services/Rss/?PostingId=2007731105943979989')]
|
||||
feeds = [(u'Al Jazeera English',
|
||||
u'http://english.aljazeera.net/Services/Rss/?PostingId=2007731105943979989')]
|
||||
|
||||
def get_article_url(self, article):
|
||||
artlurl = article.get('link', None)
|
||||
return artlurl.replace('http://english.aljazeera.net//','http://english.aljazeera.net/')
|
||||
return artlurl.replace('http://english.aljazeera.net//', 'http://english.aljazeera.net/')
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
for item in soup.findAll(face=True):
|
||||
del item['face']
|
||||
td = soup.find('td',attrs={'class':'DetailedSummary'})
|
||||
td = soup.find('td', attrs={'class': 'DetailedSummary'})
|
||||
if td:
|
||||
td.name = 'div'
|
||||
spn = soup.find('span',attrs={'id':'DetailedTitle'})
|
||||
spn = soup.find('span', attrs={'id': 'DetailedTitle'})
|
||||
if spn:
|
||||
spn.name='h1'
|
||||
for itm in soup.findAll('span', attrs={'id':['dvArticleDate','ctl00_cphBody_lblDate']}):
|
||||
spn.name = 'h1'
|
||||
for itm in soup.findAll('span', attrs={'id': ['dvArticleDate', 'ctl00_cphBody_lblDate']}):
|
||||
itm.name = 'div'
|
||||
for alink in soup.findAll('a'):
|
||||
if alink.string is not None:
|
||||
|
@ -6,6 +6,7 @@ almasryalyoum.com
|
||||
'''
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
|
||||
|
||||
class AlMasryAlyoum(BasicNewsRecipe):
|
||||
title = u'Al-Masry Alyoum (المصري اليوم)'
|
||||
__author__ = 'Hassan Williamson'
|
||||
@ -16,38 +17,38 @@ class AlMasryAlyoum(BasicNewsRecipe):
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
#delay = 1
|
||||
use_embedded_content = False
|
||||
publisher = 'Al-Masry Alyoum'
|
||||
category = 'News'
|
||||
publication_type = 'newsportal'
|
||||
|
||||
extra_css = ' body{ font-family: Verdana,Helvetica,Arial,sans-serif; direction: rtl; } .tit_2{ font-weight: bold; font-size: 2em; } .pinfo{ font-size: 1.3em; } .articleimg img{ max-width: 100%; } .imgauther{ display: block; font-size: 0.7em; } .caption{ font-size: 0.7em; } '
|
||||
extra_css = ' body{ font-family: Verdana,Helvetica,Arial,sans-serif; direction: rtl; } .tit_2{ font-weight: bold; font-size: 2em; } .pinfo{ font-size: 1.3em; } .articleimg img{ max-width: 100%; } .imgauther{ display: block; font-size: 0.7em; } .caption{ font-size: 0.7em; } ' # noqa
|
||||
|
||||
keep_only_tags = [
|
||||
dict(name='div', attrs={'class':['article']})
|
||||
dict(name='div', attrs={'class': ['article']})
|
||||
]
|
||||
|
||||
remove_tags = [
|
||||
dict(name='div', attrs={'class':['share_buttons_container']}),
|
||||
dict(name='div', attrs={'class':['min_related']}),
|
||||
dict(name='div', attrs={'id':['feedback']}),
|
||||
dict(name='div', attrs={'class':['news_SMSBox']}),
|
||||
dict(name='div', attrs={'class':['tags']}),
|
||||
dict(name='div', attrs={'class':['ads', 'y_logo_news']}),
|
||||
dict(name='div', attrs={'class':['ads']}),
|
||||
dict(name='div', attrs={'class':['option']}),
|
||||
dict(name='div', attrs={'class':['seealso']}),
|
||||
dict(name='div', attrs={'id':['comments']}),
|
||||
dict(name='div', attrs={'class': ['share_buttons_container']}),
|
||||
dict(name='div', attrs={'class': ['min_related']}),
|
||||
dict(name='div', attrs={'id': ['feedback']}),
|
||||
dict(name='div', attrs={'class': ['news_SMSBox']}),
|
||||
dict(name='div', attrs={'class': ['tags']}),
|
||||
dict(name='div', attrs={'class': ['ads', 'y_logo_news']}),
|
||||
dict(name='div', attrs={'class': ['ads']}),
|
||||
dict(name='div', attrs={'class': ['option']}),
|
||||
dict(name='div', attrs={'class': ['seealso']}),
|
||||
dict(name='div', attrs={'id': ['comments']}),
|
||||
]
|
||||
|
||||
remove_attributes = [
|
||||
'width','height','style'
|
||||
'width', 'height', 'style'
|
||||
]
|
||||
|
||||
feeds = [
|
||||
(u'أخر الأخبار', 'http://www.almasryalyoum.com/rss/RssFeeds'),
|
||||
(u'الصفحة الرئيسية', 'http://www.almasryalyoum.com/rss/RssFeeds?homePage=true'),
|
||||
(u'الصفحة الرئيسية',
|
||||
'http://www.almasryalyoum.com/rss/RssFeeds?homePage=true'),
|
||||
(u'أقلام وآراء', 'http://www.almasryalyoum.com/rss/RssFeeds?typeId=2&homePage=false'),
|
||||
(u'أخبار مصر', 'http://www.almasryalyoum.com/rss/RssFeeds?sectionId=3'),
|
||||
(u'رياضة', 'http://www.almasryalyoum.com/rss/RssFeeds?sectionId=8'),
|
||||
@ -56,23 +57,28 @@ class AlMasryAlyoum(BasicNewsRecipe):
|
||||
(u'فنون', 'http://www.almasryalyoum.com/rss/RssFeeds?sectionId=10'),
|
||||
(u'منوعاتنون', 'http://www.almasryalyoum.com/rss/RssFeeds?sectionId=12'),
|
||||
(u'ثقافة', 'http://www.almasryalyoum.com/rss/RssFeeds?sectionId=6'),
|
||||
(u'علوم وتكنولوجيا', 'http://www.almasryalyoum.com/rss/RssFeeds?sectionId=9'),
|
||||
(u'تحقيقات وحوارات', 'http://www.almasryalyoum.com/rss/RssFeeds?sectionId=5'),
|
||||
(u'علوم وتكنولوجيا',
|
||||
'http://www.almasryalyoum.com/rss/RssFeeds?sectionId=9'),
|
||||
(u'تحقيقات وحوارات',
|
||||
'http://www.almasryalyoum.com/rss/RssFeeds?sectionId=5'),
|
||||
(u'المرأة', 'http://www.almasryalyoum.com/rss/RssFeeds?sectionId=69'),
|
||||
(u'رأي', 'http://www.almasryalyoum.com/rss/RssFeeds?sectionId=2'),
|
||||
(u'وسط الناس', 'http://www.almasryalyoum.com/rss/RssFeeds?sectionId=13'),
|
||||
(u'مركز المصري للدراسات و المعلومات', 'http://www.almasryalyoum.com/rss/RssFeeds?sectionId=56'),
|
||||
(u'مركز المصري للدراسات و المعلومات',
|
||||
'http://www.almasryalyoum.com/rss/RssFeeds?sectionId=56'),
|
||||
(u'مطبخ', 'http://www.almasryalyoum.com/rss/RssFeeds?sectionId=81'),
|
||||
(u'برلمان مصر', 'http://www.almasryalyoum.com/rss/RssFeeds?sectionId=78'),
|
||||
(u'تقارير', 'http://www.almasryalyoum.com/rss/RssFeeds?sectionId=54'),
|
||||
(u'تحليلات', 'http://www.almasryalyoum.com/rss/RssFeeds?sectionId=60'),
|
||||
(u'عروض نقدية', 'http://www.almasryalyoum.com/rss/RssFeeds?sectionId=61'),
|
||||
(u'دراسات', 'http://www.almasryalyoum.com/rss/RssFeeds?sectionId=62'),
|
||||
(u'كتاب المصري اليوم', 'http://www.almasryalyoum.com/rss/RssFeeds?sectionId=65'),
|
||||
(u'كتاب المصري اليوم',
|
||||
'http://www.almasryalyoum.com/rss/RssFeeds?sectionId=65'),
|
||||
(u'فعاليات', 'http://www.almasryalyoum.com/rss/RssFeeds?sectionId=66'),
|
||||
(u'إسلامي', 'http://www.almasryalyoum.com/rss/RssFeeds?sectionId=75'),
|
||||
(u'مطبخي', 'http://www.almasryalyoum.com/rss/RssFeeds?sectionId=76'),
|
||||
(u'مسلسلاتيطبخي', 'http://www.almasryalyoum.com/rss/RssFeeds?sectionId=77'),
|
||||
(u'مسلسلاتيطبخي',
|
||||
'http://www.almasryalyoum.com/rss/RssFeeds?sectionId=77'),
|
||||
(u'رمضان زمان', 'http://www.almasryalyoum.com/rss/RssFeeds?sectionId=82'),
|
||||
(u'تقارير', 'http://www.almasryalyoum.com/rss/RssFeeds?sectionId=85'),
|
||||
(u'سيارات', 'http://www.almasryalyoum.com/rss/RssFeeds?sectionId=86'),
|
||||
|
@ -5,10 +5,14 @@ __copyright__ = '2014, spswerling'
|
||||
'''
|
||||
http://www.al-monitor.com/
|
||||
'''
|
||||
import string, inspect, datetime, re
|
||||
import string
|
||||
import inspect
|
||||
import datetime
|
||||
import re
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup
|
||||
|
||||
|
||||
class AlMonitor(BasicNewsRecipe):
|
||||
title = u'Al Monitor'
|
||||
__author__ = u'spswerling'
|
||||
@ -26,29 +30,29 @@ class AlMonitor(BasicNewsRecipe):
|
||||
recursions = 0
|
||||
compress_news_images = True
|
||||
compress_news_images_max_size = 7
|
||||
scale_news_images = (150,200) # (kindle touch: 600x800)
|
||||
scale_news_images = (150, 200) # (kindle touch: 600x800)
|
||||
useHighResImages = False
|
||||
oldest_article = 1.5
|
||||
max_articles_per_section = 15
|
||||
|
||||
sections = [
|
||||
(u'egypt',u'http://www.al-monitor.com/pulse/egypt-pulse'),
|
||||
(u'gulf',u'http://www.al-monitor.com/pulse/gulf-pulse'),
|
||||
(u'iran',u'http://www.al-monitor.com/pulse/iran-pulse'),
|
||||
(u'iraq',u'http://www.al-monitor.com/pulse/iraq-pulse'),
|
||||
(u'israel',u'http://www.al-monitor.com/pulse/israel-pulse'),
|
||||
(u'lebanon',u'http://www.al-monitor.com/pulse/lebanon-pulse'),
|
||||
(u'palistine',u'http://www.al-monitor.com/pulse/palistine-pulse'),
|
||||
(u'syria',u'http://www.al-monitor.com/pulse/syria-pulse'),
|
||||
(u'turkey',u'http://www.al-monitor.com/pulse/turkey-pulse'),
|
||||
(u'egypt', u'http://www.al-monitor.com/pulse/egypt-pulse'),
|
||||
(u'gulf', u'http://www.al-monitor.com/pulse/gulf-pulse'),
|
||||
(u'iran', u'http://www.al-monitor.com/pulse/iran-pulse'),
|
||||
(u'iraq', u'http://www.al-monitor.com/pulse/iraq-pulse'),
|
||||
(u'israel', u'http://www.al-monitor.com/pulse/israel-pulse'),
|
||||
(u'lebanon', u'http://www.al-monitor.com/pulse/lebanon-pulse'),
|
||||
(u'palistine', u'http://www.al-monitor.com/pulse/palistine-pulse'),
|
||||
(u'syria', u'http://www.al-monitor.com/pulse/syria-pulse'),
|
||||
(u'turkey', u'http://www.al-monitor.com/pulse/turkey-pulse'),
|
||||
]
|
||||
|
||||
# util for creating remove_tags and keep_tags style regex matchers
|
||||
def tag_matcher(elt, attr, rgx_str):
|
||||
return dict(name=elt, attrs={attr:re.compile(rgx_str, re.IGNORECASE)})
|
||||
return dict(name=elt, attrs={attr: re.compile(rgx_str, re.IGNORECASE)})
|
||||
|
||||
remove_tags = [
|
||||
dict(attrs={'id':[
|
||||
dict(attrs={'id': [
|
||||
'header',
|
||||
'pulsebanner',
|
||||
'relatedarticles',
|
||||
@ -136,7 +140,7 @@ class AlMonitor(BasicNewsRecipe):
|
||||
return super(self.__class__, self).preprocess_raw_html(raw_html, url)
|
||||
|
||||
def populate_article_metadata(self, article, soup, first):
|
||||
summary_node = soup.find('div', {'id':'summary'})
|
||||
summary_node = soup.find('div', {'id': 'summary'})
|
||||
if summary_node:
|
||||
summary = self.text(summary_node)
|
||||
self._p('Summary: ' + summary)
|
||||
@ -167,7 +171,7 @@ class AlMonitor(BasicNewsRecipe):
|
||||
def date_from_string(self, datestring):
|
||||
try:
|
||||
# eg: Posted September 17, 2014
|
||||
dt = datetime.datetime.strptime(datestring,"Posted %B %d, %Y")
|
||||
dt = datetime.datetime.strptime(datestring, "Posted %B %d, %Y")
|
||||
except:
|
||||
dt = None
|
||||
|
||||
@ -192,7 +196,7 @@ class AlMonitor(BasicNewsRecipe):
|
||||
|
||||
return abs_url
|
||||
|
||||
def text(self,n):
|
||||
def text(self, n):
|
||||
return self.tag_to_string(n).strip()
|
||||
|
||||
def _dbg_soup_node(self, node):
|
||||
|
@ -3,6 +3,7 @@ __copyright__ = '2012, Peter Grungi <p dot grungi at gmail dot com>'
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class AlbertMohlersBlog(BasicNewsRecipe):
|
||||
title = u'Albert Mohler\'s Blog'
|
||||
__author__ = 'Peter Grungi'
|
||||
@ -15,4 +16,5 @@ class AlbertMohlersBlog(BasicNewsRecipe):
|
||||
language = 'en'
|
||||
author = 'Albert Mohler'
|
||||
|
||||
feeds = [(u'Albert Mohler\'s Blog', u'http://feeds.feedburner.com/AlbertMohlersBlog?format=xml')]
|
||||
feeds = [(u'Albert Mohler\'s Blog',
|
||||
u'http://feeds.feedburner.com/AlbertMohlersBlog?format=xml')]
|
||||
|
@ -2,16 +2,16 @@ __license__ = 'GPL v3'
|
||||
import re
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class AlejaKomiksu(BasicNewsRecipe):
|
||||
title = u'Aleja Komiksu'
|
||||
__author__ = 'fenuks'
|
||||
description = u'Serwis poświęcony komiksom. Najnowsze wieści, recenzje, artykuły, wywiady, galerie, komiksy online, konkursy, linki, baza komiksów online.'
|
||||
category = 'comics'
|
||||
#publication_type = ''
|
||||
language = 'pl'
|
||||
#encoding = ''
|
||||
extra_css = 'ul {list-style-type: none;} .gfx_news {float: right;}'
|
||||
preprocess_regexps = [(re.compile(ur'((<li class="no_img_b">(Do poczytania)|(Nowości):</li>)|(<p class="head2">Komentarze</p>)).*</body>', re.DOTALL|re.IGNORECASE), lambda match: '</body>')]
|
||||
preprocess_regexps = [(re.compile(ur'((<li class="no_img_b">(Do poczytania)|(Nowości):</li>)|(<p class="head2">Komentarze</p>)).*</body>',
|
||||
re.DOTALL | re.IGNORECASE), lambda match: '</body>')]
|
||||
cover_url = 'http://www.alejakomiksu.com/gfx/build/logo.png'
|
||||
masthead_url = 'http://www.alejakomiksu.com/gfx/build/logo.png'
|
||||
use_embedded_content = False
|
||||
@ -23,14 +23,12 @@ class AlejaKomiksu(BasicNewsRecipe):
|
||||
remove_attributes = ['style', 'font']
|
||||
ignore_duplicate_articles = {'title', 'url'}
|
||||
|
||||
keep_only_tags = [dict(attrs={'class':'cont_tresc'})]
|
||||
#remove_tags = [dict()]
|
||||
#remove_tags_before = dict()
|
||||
keep_only_tags = [dict(attrs={'class': 'cont_tresc'})]
|
||||
|
||||
feeds = [(u'Wiadomości', 'http://www.alejakomiksu.com/rss.php5')]
|
||||
|
||||
def skip_ad_pages(self, soup):
|
||||
tag = soup.find(attrs={'class':'rodzaj'})
|
||||
tag = soup.find(attrs={'class': 'rodzaj'})
|
||||
if tag and tag.a.string.lower().strip() == 'recenzje':
|
||||
link = soup.find(text=re.compile('recenzuje'))
|
||||
if link:
|
||||
|
@ -8,6 +8,7 @@ www.alo.rs
|
||||
import re
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
|
||||
|
||||
class Alo_Novine(BasicNewsRecipe):
|
||||
title = 'Alo!'
|
||||
__author__ = 'Darko Miletic'
|
||||
@ -30,24 +31,22 @@ class Alo_Novine(BasicNewsRecipe):
|
||||
img{margin-bottom: 0.8em} """
|
||||
|
||||
conversion_options = {
|
||||
'comment' : description
|
||||
, 'tags' : category
|
||||
, 'publisher': publisher
|
||||
, 'language' : language
|
||||
'comment': description, 'tags': category, 'publisher': publisher, 'language': language
|
||||
}
|
||||
|
||||
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
||||
|
||||
remove_tags = [dict(name=['object','link','embed'])]
|
||||
remove_attributes = ['height','width']
|
||||
remove_tags = [dict(name=['object', 'link', 'embed'])]
|
||||
remove_attributes = ['height', 'width']
|
||||
|
||||
feeds = [
|
||||
(u'Najnovije Vijesti', u'http://www.alo.rs/rss/danasnje_vesti')
|
||||
,(u'Politika' , u'http://www.alo.rs/rss/politika')
|
||||
,(u'Vesti' , u'http://www.alo.rs/rss/vesti')
|
||||
,(u'Sport' , u'http://www.alo.rs/rss/sport')
|
||||
,(u'Ljudi' , u'http://www.alo.rs/rss/ljudi')
|
||||
,(u'Saveti' , u'http://www.alo.rs/rss/saveti')
|
||||
|
||||
(u'Najnovije Vijesti', u'http://www.alo.rs/rss/danasnje_vesti'),
|
||||
(u'Politika', u'http://www.alo.rs/rss/politika'),
|
||||
(u'Vesti', u'http://www.alo.rs/rss/vesti'),
|
||||
(u'Sport', u'http://www.alo.rs/rss/sport'),
|
||||
(u'Ljudi', u'http://www.alo.rs/rss/ljudi'),
|
||||
(u'Saveti', u'http://www.alo.rs/rss/saveti')
|
||||
]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
@ -61,5 +60,4 @@ class Alo_Novine(BasicNewsRecipe):
|
||||
return 'http://www.alo.rs/resources/templates/tools/print.php?id=' + artid
|
||||
|
||||
def image_url_processor(self, baseurl, url):
|
||||
return url.replace('alo.rs//','alo.rs/')
|
||||
|
||||
return url.replace('alo.rs//', 'alo.rs/')
|
||||
|
@ -6,6 +6,7 @@ aoh.dk
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class aoh_dk(BasicNewsRecipe):
|
||||
title = 'Alt om Herning'
|
||||
__author__ = 'Rasmus Lauritsen'
|
||||
@ -25,19 +26,15 @@ class aoh_dk(BasicNewsRecipe):
|
||||
"""
|
||||
|
||||
conversion_options = {
|
||||
'comment' : description
|
||||
, 'tags' : category
|
||||
, 'publisher' : publisher
|
||||
, 'language' : language
|
||||
'comment': description, 'tags': category, 'publisher': publisher, 'language': language
|
||||
}
|
||||
|
||||
feeds = [(u'All news', u'http://aoh.dk/rss.xml')]
|
||||
|
||||
keep_only_tags = [
|
||||
dict(name='h1')
|
||||
,dict(name='span', attrs={'class':['frontpage_body']})
|
||||
dict(name='h1'), dict(name='span', attrs={'class': ['frontpage_body']})
|
||||
]
|
||||
|
||||
remove_tags = [
|
||||
dict(name=['object','link'])
|
||||
dict(name=['object', 'link'])
|
||||
]
|
||||
|
@ -1,9 +1,10 @@
|
||||
from calibre.ptempfile import PersistentTemporaryFile
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class Alternet(BasicNewsRecipe):
|
||||
title = u'Alternet'
|
||||
__author__= 'rty'
|
||||
__author__ = 'rty'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
publisher = 'alternet.org'
|
||||
@ -13,7 +14,7 @@ class Alternet(BasicNewsRecipe):
|
||||
(u'Front Page', u'http://feeds.feedblitz.com/alternet')
|
||||
]
|
||||
|
||||
remove_attributes = ['width', 'align','cellspacing']
|
||||
remove_attributes = ['width', 'align', 'cellspacing']
|
||||
remove_javascript = True
|
||||
use_embedded_content = True
|
||||
no_stylesheets = True
|
||||
@ -28,7 +29,7 @@ class Alternet(BasicNewsRecipe):
|
||||
def get_obfuscated_article(self, url):
|
||||
br = self.get_browser()
|
||||
br.open(url)
|
||||
response = br.follow_link(url_regex = r'/printversion/[0-9]+', nr = 0)
|
||||
response = br.follow_link(url_regex=r'/printversion/[0-9]+', nr=0)
|
||||
html = response.read()
|
||||
self.temp_files.append(PersistentTemporaryFile('_fa.html'))
|
||||
self.temp_files[-1].write(html)
|
||||
|
@ -12,7 +12,9 @@ Change Log:
|
||||
|
||||
from calibre import (__appname__, force_unicode, strftime)
|
||||
from calibre.utils.date import now as nowf
|
||||
import os, datetime, re
|
||||
import os
|
||||
import datetime
|
||||
import re
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
from contextlib import nested
|
||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup
|
||||
@ -21,6 +23,7 @@ from calibre.ebooks.metadata.toc import TOC
|
||||
from calibre.ebooks.metadata import MetaInformation
|
||||
from calibre.utils.localization import canonicalize_lang
|
||||
|
||||
|
||||
class AppleDaily(BasicNewsRecipe):
|
||||
title = u'AM730'
|
||||
__author__ = 'Eddie Lau'
|
||||
@ -37,44 +40,44 @@ class AppleDaily(BasicNewsRecipe):
|
||||
description = 'http://www.am730.com.hk'
|
||||
category = 'Chinese, News, Hong Kong'
|
||||
masthead_url = 'http://www.am730.com.hk/images/logo.jpg'
|
||||
extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 20px; margin-bottom: 20px; max-height:70%;} div[id=articleHeader] {font-size:200%; text-align:left; font-weight:bold;} li {font-size:50%; margin-left:auto; margin-right:auto;}'
|
||||
keep_only_tags = [dict(name='h2', attrs={'class':'printTopic'}),
|
||||
dict(name='div', attrs={'id':'article_content'}),
|
||||
dict(name='div', attrs={'id':'slider'})]
|
||||
remove_tags = [dict(name='img', attrs={'src':'images/am730_article_logo.jpg'}),
|
||||
dict(name='img', attrs={'src':'images/am_endmark.gif'})]
|
||||
extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 20px; margin-bottom: 20px; max-height:70%;} div[id=articleHeader] {font-size:200%; text-align:left; font-weight:bold;} li {font-size:50%; margin-left:auto; margin-right:auto;}' # noqa
|
||||
keep_only_tags = [dict(name='h2', attrs={'class': 'printTopic'}),
|
||||
dict(name='div', attrs={'id': 'article_content'}),
|
||||
dict(name='div', attrs={'id': 'slider'})]
|
||||
remove_tags = [dict(name='img', attrs={'src': 'images/am730_article_logo.jpg'}),
|
||||
dict(name='img', attrs={'src': 'images/am_endmark.gif'})]
|
||||
|
||||
def get_dtlocal(self):
|
||||
dt_utc = datetime.datetime.utcnow()
|
||||
# convert UTC to local hk time - at HKT 6am, all news are available
|
||||
return dt_utc + datetime.timedelta(8.0/24) - datetime.timedelta(6.0/24)
|
||||
return dt_utc + datetime.timedelta(8.0 / 24) - datetime.timedelta(6.0 / 24)
|
||||
|
||||
def get_fetchdate(self):
|
||||
if __Date__ <> '':
|
||||
if __Date__ != '':
|
||||
return __Date__
|
||||
else:
|
||||
return self.get_dtlocal().strftime("%Y%m%d")
|
||||
|
||||
def get_fetchformatteddate(self):
|
||||
if __Date__ <> '':
|
||||
return __Date__[0:4]+'-'+__Date__[4:6]+'-'+__Date__[6:8]
|
||||
if __Date__ != '':
|
||||
return __Date__[0:4] + '-' + __Date__[4:6] + '-' + __Date__[6:8]
|
||||
else:
|
||||
return self.get_dtlocal().strftime("%Y-%m-%d")
|
||||
|
||||
def get_fetchyear(self):
|
||||
if __Date__ <> '':
|
||||
if __Date__ != '':
|
||||
return __Date__[0:4]
|
||||
else:
|
||||
return self.get_dtlocal().strftime("%Y")
|
||||
|
||||
def get_fetchmonth(self):
|
||||
if __Date__ <> '':
|
||||
if __Date__ != '':
|
||||
return __Date__[4:6]
|
||||
else:
|
||||
return self.get_dtlocal().strftime("%m")
|
||||
|
||||
def get_fetchday(self):
|
||||
if __Date__ <> '':
|
||||
if __Date__ != '':
|
||||
return __Date__[6:8]
|
||||
else:
|
||||
return self.get_dtlocal().strftime("%d")
|
||||
@ -85,7 +88,9 @@ class AppleDaily(BasicNewsRecipe):
|
||||
|
||||
def get_cover_url(self):
|
||||
soup = self.index_to_soup('http://www.am730.com.hk')
|
||||
cover = 'http://www.am730.com.hk/' + soup.find(attrs={'id':'mini_news_img'}).find('img').get('src', False)
|
||||
cover = 'http://www.am730.com.hk/' + \
|
||||
soup.find(attrs={'id': 'mini_news_img'}).find(
|
||||
'img').get('src', False)
|
||||
br = BasicNewsRecipe.get_browser(self)
|
||||
try:
|
||||
br.open(cover)
|
||||
@ -97,7 +102,7 @@ class AppleDaily(BasicNewsRecipe):
|
||||
if first and hasattr(self, 'add_toc_thumbnail'):
|
||||
picdiv = soup.find('img')
|
||||
if picdiv is not None:
|
||||
self.add_toc_thumbnail(article,picdiv['src'])
|
||||
self.add_toc_thumbnail(article, picdiv['src'])
|
||||
|
||||
def parse_index(self):
|
||||
feeds = []
|
||||
@ -123,7 +128,8 @@ class AppleDaily(BasicNewsRecipe):
|
||||
mi.publisher = __appname__
|
||||
mi.author_sort = __appname__
|
||||
if self.publication_type:
|
||||
mi.publication_type = 'periodical:'+self.publication_type+':'+self.short_title()
|
||||
mi.publication_type = 'periodical:' + \
|
||||
self.publication_type + ':' + self.short_title()
|
||||
mi.timestamp = nowf()
|
||||
article_titles, aseen = [], set()
|
||||
for f in feeds:
|
||||
@ -142,9 +148,9 @@ class AppleDaily(BasicNewsRecipe):
|
||||
if language is not None:
|
||||
mi.language = language
|
||||
# This one affects the pub date shown in kindle title
|
||||
#mi.pubdate = nowf()
|
||||
# now appears to need the time field to be > 12.00noon as well
|
||||
mi.pubdate = datetime.datetime(int(self.get_fetchyear()), int(self.get_fetchmonth()), int(self.get_fetchday()), 12, 30, 0)
|
||||
mi.pubdate = datetime.datetime(int(self.get_fetchyear()), int(
|
||||
self.get_fetchmonth()), int(self.get_fetchday()), 12, 30, 0)
|
||||
opf_path = os.path.join(dir, 'index.opf')
|
||||
ncx_path = os.path.join(dir, 'index.ncx')
|
||||
|
||||
@ -153,12 +159,14 @@ class AppleDaily(BasicNewsRecipe):
|
||||
mp = getattr(self, 'masthead_path', None)
|
||||
if mp is not None and os.access(mp, os.R_OK):
|
||||
from calibre.ebooks.metadata.opf2 import Guide
|
||||
ref = Guide.Reference(os.path.basename(self.masthead_path), os.getcwdu())
|
||||
ref = Guide.Reference(os.path.basename(
|
||||
self.masthead_path), os.getcwdu())
|
||||
ref.type = 'masthead'
|
||||
ref.title = 'Masthead Image'
|
||||
opf.guide.append(ref)
|
||||
|
||||
manifest = [os.path.join(dir, 'feed_%d'%i) for i in range(len(feeds))]
|
||||
manifest = [os.path.join(dir, 'feed_%d' % i)
|
||||
for i in range(len(feeds))]
|
||||
manifest.append(os.path.join(dir, 'index.html'))
|
||||
manifest.append(os.path.join(dir, 'index.ncx'))
|
||||
|
||||
@ -189,12 +197,11 @@ class AppleDaily(BasicNewsRecipe):
|
||||
self.play_order_counter = 0
|
||||
self.play_order_map = {}
|
||||
|
||||
|
||||
def feed_index(num, parent):
|
||||
f = feeds[num]
|
||||
for j, a in enumerate(f):
|
||||
if getattr(a, 'downloaded', False):
|
||||
adir = 'feed_%d/article_%d/'%(num, j)
|
||||
adir = 'feed_%d/article_%d/' % (num, j)
|
||||
auth = a.author
|
||||
if not auth:
|
||||
auth = None
|
||||
@ -204,16 +211,18 @@ class AppleDaily(BasicNewsRecipe):
|
||||
else:
|
||||
desc = self.description_limiter(desc)
|
||||
tt = a.toc_thumbnail if a.toc_thumbnail else None
|
||||
entries.append('%sindex.html'%adir)
|
||||
entries.append('%sindex.html' % adir)
|
||||
po = self.play_order_map.get(entries[-1], None)
|
||||
if po is None:
|
||||
self.play_order_counter += 1
|
||||
po = self.play_order_counter
|
||||
parent.add_item('%sindex.html'%adir, None,
|
||||
a.title if a.title else _('Untitled Article'),
|
||||
parent.add_item('%sindex.html' % adir, None,
|
||||
a.title if a.title else _(
|
||||
'Untitled Article'),
|
||||
play_order=po, author=auth,
|
||||
description=desc, toc_thumbnail=tt)
|
||||
last = os.path.join(self.output_dir, ('%sindex.html'%adir).replace('/', os.sep))
|
||||
last = os.path.join(
|
||||
self.output_dir, ('%sindex.html' % adir).replace('/', os.sep))
|
||||
for sp in a.sub_pages:
|
||||
prefix = os.path.commonprefix([opf_path, sp])
|
||||
relp = sp[len(prefix):]
|
||||
@ -226,12 +235,14 @@ class AppleDaily(BasicNewsRecipe):
|
||||
soup = BeautifulSoup(src)
|
||||
body = soup.find('body')
|
||||
if body is not None:
|
||||
prefix = '/'.join('..'for i in range(2*len(re.findall(r'link\d+', last))))
|
||||
prefix = '/'.join('..'for i in range(2 *
|
||||
len(re.findall(r'link\d+', last))))
|
||||
templ = self.navbar.generate(True, num, j, len(f),
|
||||
not self.has_single_feed,
|
||||
a.orig_url, __appname__, prefix=prefix,
|
||||
center=self.center_navbar)
|
||||
elem = BeautifulSoup(templ.render(doctype='xhtml').decode('utf-8')).find('div')
|
||||
elem = BeautifulSoup(templ.render(
|
||||
doctype='xhtml').decode('utf-8')).find('div')
|
||||
body.insert(len(body.contents), elem)
|
||||
with open(last, 'wb') as fi:
|
||||
fi.write(unicode(soup).encode('utf-8'))
|
||||
@ -240,7 +251,7 @@ class AppleDaily(BasicNewsRecipe):
|
||||
|
||||
if len(feeds) > 1:
|
||||
for i, f in enumerate(feeds):
|
||||
entries.append('feed_%d/index.html'%i)
|
||||
entries.append('feed_%d/index.html' % i)
|
||||
po = self.play_order_map.get(entries[-1], None)
|
||||
if po is None:
|
||||
self.play_order_counter += 1
|
||||
@ -251,11 +262,11 @@ class AppleDaily(BasicNewsRecipe):
|
||||
desc = getattr(f, 'description', None)
|
||||
if not desc:
|
||||
desc = None
|
||||
feed_index(i, toc.add_item('feed_%d/index.html'%i, None,
|
||||
feed_index(i, toc.add_item('feed_%d/index.html' % i, None,
|
||||
f.title, play_order=po, description=desc, author=auth))
|
||||
|
||||
else:
|
||||
entries.append('feed_%d/index.html'%0)
|
||||
entries.append('feed_%d/index.html' % 0)
|
||||
feed_index(0, toc)
|
||||
|
||||
for i, p in enumerate(entries):
|
||||
@ -265,5 +276,3 @@ class AppleDaily(BasicNewsRecipe):
|
||||
|
||||
with nested(open(opf_path, 'wb'), open(ncx_path, 'wb')) as (opf_file, ncx_file):
|
||||
opf.render(opf_file, ncx_file)
|
||||
|
||||
|
||||
|
@ -6,6 +6,7 @@ ambito.com
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class Ambito(BasicNewsRecipe):
|
||||
title = 'Ambito.com'
|
||||
__author__ = 'Darko Miletic'
|
||||
@ -26,26 +27,25 @@ class Ambito(BasicNewsRecipe):
|
||||
.t2_portada{font-size: xx-large; font-family: Georgia,serif; color: #026698}
|
||||
"""
|
||||
|
||||
|
||||
conversion_options = {
|
||||
'comment' : description
|
||||
, 'tags' : category
|
||||
, 'publisher' : publisher
|
||||
, 'language' : language
|
||||
'comment': description, 'tags': category, 'publisher': publisher, 'language': language
|
||||
}
|
||||
|
||||
keep_only_tags = [dict(attrs={'id':['tituloDespliegue','imgDesp','textoDespliegue']})]
|
||||
remove_tags = [dict(name=['object','link','embed','iframe','meta','link'])]
|
||||
keep_only_tags = [
|
||||
dict(attrs={'id': ['tituloDespliegue', 'imgDesp', 'textoDespliegue']})]
|
||||
remove_tags = [
|
||||
dict(name=['object', 'link', 'embed', 'iframe', 'meta', 'link'])]
|
||||
|
||||
feeds = [
|
||||
(u'Principales Noticias', u'http://www.ambito.com/rss/noticiasp.asp' )
|
||||
,(u'Economia' , u'http://www.ambito.com/rss/noticias.asp?S=Econom%EDa' )
|
||||
,(u'Politica' , u'http://www.ambito.com/rss/noticias.asp?S=Pol%EDtica' )
|
||||
,(u'Informacion General' , u'http://www.ambito.com/rss/noticias.asp?S=Informaci%F3n%20General')
|
||||
,(u'Campo' , u'http://www.ambito.com/rss/noticias.asp?S=Agro' )
|
||||
,(u'Internacionales' , u'http://www.ambito.com/rss/noticias.asp?S=Internacionales' )
|
||||
,(u'Deportes' , u'http://www.ambito.com/rss/noticias.asp?S=Deportes' )
|
||||
,(u'Espectaculos' , u'http://www.ambito.com/rss/noticias.asp?S=Espect%E1culos' )
|
||||
,(u'Tecnologia' , u'http://www.ambito.com/rss/noticias.asp?S=Tecnolog%EDa' )
|
||||
,(u'Ambito Nacional' , u'http://www.ambito.com/rss/noticias.asp?S=Ambito%20Nacional' )
|
||||
|
||||
(u'Principales Noticias', u'http://www.ambito.com/rss/noticiasp.asp'),
|
||||
(u'Economia', u'http://www.ambito.com/rss/noticias.asp?S=Econom%EDa'),
|
||||
(u'Politica', u'http://www.ambito.com/rss/noticias.asp?S=Pol%EDtica'),
|
||||
(u'Informacion General', u'http://www.ambito.com/rss/noticias.asp?S=Informaci%F3n%20General'),
|
||||
(u'Campo', u'http://www.ambito.com/rss/noticias.asp?S=Agro'),
|
||||
(u'Internacionales', u'http://www.ambito.com/rss/noticias.asp?S=Internacionales'),
|
||||
(u'Deportes', u'http://www.ambito.com/rss/noticias.asp?S=Deportes'),
|
||||
(u'Espectaculos', u'http://www.ambito.com/rss/noticias.asp?S=Espect%E1culos'),
|
||||
(u'Tecnologia', u'http://www.ambito.com/rss/noticias.asp?S=Tecnolog%EDa'),
|
||||
(u'Ambito Nacional', u'http://www.ambito.com/rss/noticias.asp?S=Ambito%20Nacional')
|
||||
]
|
||||
|
@ -8,6 +8,7 @@ import time
|
||||
from calibre import strftime
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class Ambito_Financiero(BasicNewsRecipe):
|
||||
title = 'Ambito Financiero'
|
||||
__author__ = 'Darko Miletic'
|
||||
@ -31,14 +32,12 @@ class Ambito_Financiero(BasicNewsRecipe):
|
||||
"""
|
||||
|
||||
conversion_options = {
|
||||
'comment' : description
|
||||
, 'tags' : category
|
||||
, 'publisher' : publisher
|
||||
, 'language' : language
|
||||
'comment': description, 'tags': category, 'publisher': publisher, 'language': language
|
||||
}
|
||||
|
||||
keep_only_tags = [dict(name='div', attrs={'align':'justify'})]
|
||||
remove_tags = [dict(name=['object','link','embed','iframe','meta','link','table','img'])]
|
||||
keep_only_tags = [dict(name='div', attrs={'align': 'justify'})]
|
||||
remove_tags = [dict(name=['object', 'link', 'embed',
|
||||
'iframe', 'meta', 'link', 'table', 'img'])]
|
||||
remove_attributes = ['align']
|
||||
|
||||
def get_browser(self):
|
||||
@ -53,7 +52,7 @@ class Ambito_Financiero(BasicNewsRecipe):
|
||||
return br
|
||||
|
||||
def print_version(self, url):
|
||||
return url.replace('/diario/noticia.asp?','/noticias/imprimir.asp?')
|
||||
return url.replace('/diario/noticia.asp?', '/noticias/imprimir.asp?')
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
for item in soup.findAll(style=True):
|
||||
@ -67,21 +66,18 @@ class Ambito_Financiero(BasicNewsRecipe):
|
||||
|
||||
def parse_index(self):
|
||||
soup = self.index_to_soup(self.INDEX)
|
||||
cover_item = soup.find('img',attrs={'class':'fotodespliegue'})
|
||||
cover_item = soup.find('img', attrs={'class': 'fotodespliegue'})
|
||||
if cover_item:
|
||||
self.cover_url = self.PREFIX + cover_item['src']
|
||||
articles = []
|
||||
checker = []
|
||||
for feed_link in soup.findAll('a', attrs={'class':['t0_portada','t2_portada','bajada']}):
|
||||
for feed_link in soup.findAll('a', attrs={'class': ['t0_portada', 't2_portada', 'bajada']}):
|
||||
url = self.PREFIX + feed_link['href']
|
||||
title = self.tag_to_string(feed_link)
|
||||
date = strftime("%a, %d %b %Y %H:%M:%S +0000",time.gmtime())
|
||||
date = strftime("%a, %d %b %Y %H:%M:%S +0000", time.gmtime())
|
||||
if url not in checker:
|
||||
checker.append(url)
|
||||
articles.append({
|
||||
'title' :title
|
||||
,'date' :date
|
||||
,'url' :url
|
||||
,'description':u''
|
||||
'title': title, 'date': date, 'url': url, 'description': u''
|
||||
})
|
||||
return [(self.title, articles)]
|
||||
|
@ -8,6 +8,7 @@ from calibre.web.feeds.news import BasicNewsRecipe
|
||||
from calibre.utils.cleantext import clean_xml_chars
|
||||
from lxml import etree
|
||||
|
||||
|
||||
class AmericanThinker(BasicNewsRecipe):
|
||||
title = u'American Thinker'
|
||||
description = "American Thinker is a daily internet publication devoted to the thoughtful exploration of issues of importance to Americans."
|
||||
@ -24,18 +25,14 @@ class AmericanThinker(BasicNewsRecipe):
|
||||
auto_cleanup = True
|
||||
|
||||
conversion_options = {
|
||||
'comment' : description
|
||||
, 'tags' : category
|
||||
, 'publisher' : publisher
|
||||
, 'language' : language
|
||||
, 'linearize_tables' : True
|
||||
'comment': description, 'tags': category, 'publisher': publisher, 'language': language, 'linearize_tables': True
|
||||
}
|
||||
|
||||
def preprocess_raw_html(self, raw, url):
|
||||
root = html5lib.parse(
|
||||
clean_xml_chars(raw), treebuilder='lxml',
|
||||
namespaceHTMLElements=False)
|
||||
for x in root.xpath('''descendant-or-self::*[@class and contains(concat(' ', normalize-space(@class), ' '), ' article_body ') and (@class and contains(concat(' ', normalize-space(@class), ' '), ' bottom '))]'''):
|
||||
for x in root.xpath('''descendant-or-self::*[@class and contains(concat(' ', normalize-space(@class), ' '), ' article_body ') and (@class and contains(concat(' ', normalize-space(@class), ' '), ' bottom '))]'''): # noqa
|
||||
x.getparent().remove(x)
|
||||
return etree.tostring(root, encoding=unicode)
|
||||
|
||||
|
@ -7,6 +7,7 @@ spectator.org
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
from css_selectors import Select
|
||||
|
||||
|
||||
class TheAmericanSpectator(BasicNewsRecipe):
|
||||
title = 'The American Spectator'
|
||||
__author__ = 'Kovid Goyal'
|
||||
@ -20,7 +21,8 @@ class TheAmericanSpectator(BasicNewsRecipe):
|
||||
encoding = 'utf-8'
|
||||
|
||||
def parse_index(self):
|
||||
root = self.index_to_soup('http://spectator.org/issues/current', as_tree=True)
|
||||
root = self.index_to_soup(
|
||||
'http://spectator.org/issues/current', as_tree=True)
|
||||
select = Select(root)
|
||||
main = tuple(select('div#block-system-main'))[0]
|
||||
feeds = []
|
||||
@ -43,7 +45,8 @@ class TheAmericanSpectator(BasicNewsRecipe):
|
||||
for x in select('div.views-field-field-short-summary', li):
|
||||
desc = self.tag_to_string(x)
|
||||
break
|
||||
articles.append({'title':title, 'url':url, 'description':desc})
|
||||
articles.append(
|
||||
{'title': title, 'url': url, 'description': desc})
|
||||
self.log('\t', title, 'at', url)
|
||||
feeds.append((section_title, articles))
|
||||
return feeds
|
||||
|
@ -1,5 +1,6 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class AnDrumaMor(BasicNewsRecipe):
|
||||
title = u'An Druma M\xf3r'
|
||||
__author__ = "David O'Callaghan"
|
||||
@ -8,5 +9,5 @@ class AnDrumaMor(BasicNewsRecipe):
|
||||
language = 'ga'
|
||||
use_embedded_content = True
|
||||
|
||||
feeds = [(u'Nuacht Laeth\xfail', u'http://feeds.feedburner.com/NuachtLneLaethilArAnDrumaMr')]
|
||||
|
||||
feeds = [(u'Nuacht Laeth\xfail',
|
||||
u'http://feeds.feedburner.com/NuachtLneLaethilArAnDrumaMr')]
|
||||
|
@ -26,17 +26,17 @@ class anan(BasicNewsRecipe):
|
||||
masthead_url = 'http://www.anandtech.com/content/images/globals/printheader.png'
|
||||
|
||||
keep_only_tags = [
|
||||
dict(name='section', attrs={'class':['main_cont']}),
|
||||
dict(name='section', attrs={'class': ['main_cont']}),
|
||||
]
|
||||
remove_tags=[
|
||||
dict(name='div', attrs={'class':['print',
|
||||
remove_tags = [
|
||||
dict(name='div', attrs={'class': ['print',
|
||||
'breadcrumb_area noprint',
|
||||
'fl-rt noprint',
|
||||
'blog_top_right',]})
|
||||
'blog_top_right', ]})
|
||||
]
|
||||
|
||||
feeds = [('Anandtech', 'http://www.anandtech.com/rss/')]
|
||||
|
||||
def print_version(self,url):
|
||||
def print_version(self, url):
|
||||
# return url.replace("0Cshow0C", "0Cprint0C") # 2013-09-07 AGE: update
|
||||
return url.replace("/show/", "/print/") # 2014-02-27 AGE: update
|
||||
|
@ -1,5 +1,6 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class AdvancedUserRecipe1278347258(BasicNewsRecipe):
|
||||
title = u'Anchorage Daily News'
|
||||
__author__ = 'rty'
|
||||
@ -7,7 +8,6 @@ class AdvancedUserRecipe1278347258(BasicNewsRecipe):
|
||||
max_articles_per_feed = 100
|
||||
auto_cleanup = True
|
||||
|
||||
|
||||
feeds = [(u'Alaska News', u'http://www.adn.com/rss-feeds/feed/all'),
|
||||
(u'Politics', u'http://www.adn.com/rss-feeds/feed/politics'),
|
||||
]
|
||||
@ -23,16 +23,6 @@ class AdvancedUserRecipe1278347258(BasicNewsRecipe):
|
||||
no_stylesheets = True
|
||||
language = 'en'
|
||||
encoding = 'utf-8'
|
||||
conversion_options = {'linearize_tables':True}
|
||||
conversion_options = {'linearize_tables': True}
|
||||
masthead_url = 'http://media.adn.com/includes/assets/images/adn_logo.2.gif'
|
||||
|
||||
#keep_only_tags = [
|
||||
#dict(name='div', attrs={'class':'left_col story_mainbar'}),
|
||||
#]
|
||||
#remove_tags = [
|
||||
#dict(name='div', attrs={'class':'story_tools'}),
|
||||
#dict(name='p', attrs={'class':'ad_label'}),
|
||||
#]
|
||||
#remove_tags_after = [
|
||||
#dict(name='div', attrs={'class':'advertisement'}),
|
||||
#]
|
||||
|
@ -1,6 +1,7 @@
|
||||
import re
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class Android_com_pl(BasicNewsRecipe):
|
||||
title = u'Android.com.pl'
|
||||
__author__ = 'fenuks'
|
||||
@ -11,5 +12,6 @@ class Android_com_pl(BasicNewsRecipe):
|
||||
cover_url = 'http://android.com.pl/wp-content/themes/android/images/logo.png'
|
||||
oldest_article = 8
|
||||
max_articles_per_feed = 100
|
||||
preprocess_regexps = [(re.compile(ur'<p>.{,1}</p>', re.DOTALL), lambda match: '')]
|
||||
preprocess_regexps = [
|
||||
(re.compile(ur'<p>.{,1}</p>', re.DOTALL), lambda match: '')]
|
||||
feeds = [(u'Android', u'http://android.com.pl/feed/')]
|
@ -3,6 +3,7 @@
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class AdvancedUserRecipe1290663986(BasicNewsRecipe):
|
||||
title = u'Animal Pol\u00EDtico'
|
||||
publisher = u'Animal Pol\u00EDtico'
|
||||
@ -14,32 +15,31 @@ class AdvancedUserRecipe1290663986(BasicNewsRecipe):
|
||||
max_articles_per_feed = 100
|
||||
language = 'es_MX'
|
||||
|
||||
#feeds = [(u'Animal Politico', u'http://www.animalpolitico.com/feed/')]
|
||||
|
||||
remove_tags_before = dict(name='div', id='main')
|
||||
remove_tags = [dict(name='div', attrs={'class':'fb-like-button'})]
|
||||
keep_only_tags = [dict(name='h1', attrs={'class':'entry-title'}),
|
||||
dict(name='div', attrs={'class':'entry-content'})]
|
||||
remove_tags = [dict(name='div', attrs={'class': 'fb-like-button'})]
|
||||
keep_only_tags = [dict(name='h1', attrs={'class': 'entry-title'}),
|
||||
dict(name='div', attrs={'class': 'entry-content'})]
|
||||
remove_javascript = True
|
||||
INDEX = 'http://www.animalpolitico.com/'
|
||||
|
||||
def generic_parse(self, soup):
|
||||
articles = []
|
||||
for entry in soup.findAll(lambda tag: tag.name == 'li' and tag.has_key('class') and tag['class'].find('hentry') != -1): #soup.findAll('li', 'hentry'):
|
||||
# soup.findAll('li', 'hentry'):
|
||||
for entry in soup.findAll(lambda tag: tag.name == 'li' and tag.has_key('class') and tag['class'].find('hentry') != -1): # noqa
|
||||
article_url = entry.a['href'] + '?print=yes'
|
||||
article_title= entry.find('h3', 'entry-title')
|
||||
article_title= self.tag_to_string(article_title)
|
||||
article_title = entry.find('h3', 'entry-title')
|
||||
article_title = self.tag_to_string(article_title)
|
||||
article_date = entry.find('span', 'the-time')
|
||||
article_date = self.tag_to_string(article_date)
|
||||
article_desc = self.tag_to_string(entry.find('p'))
|
||||
|
||||
#print 'Article:',article_title, article_date,article_url
|
||||
#print entry['class']
|
||||
# print 'Article:',article_title, article_date,article_url
|
||||
# print entry['class']
|
||||
|
||||
articles.append({'title' : article_title,
|
||||
'date' : article_date,
|
||||
'description' : article_desc,
|
||||
'url' : article_url})
|
||||
articles.append({'title': article_title,
|
||||
'date': article_date,
|
||||
'description': article_desc,
|
||||
'url': article_url})
|
||||
# Avoid including the multimedia stuff.
|
||||
if entry['class'].find('last') != -1:
|
||||
break
|
||||
@ -48,56 +48,57 @@ class AdvancedUserRecipe1290663986(BasicNewsRecipe):
|
||||
|
||||
def plumaje_parse(self, soup):
|
||||
articles = []
|
||||
blogs_soup = soup.find(lambda tag: tag.name == 'ul' and tag.has_key('class') and tag['class'].find('bloglist-fecha') != -1)
|
||||
blogs_soup = soup.find(lambda tag: tag.name == 'ul' and tag.has_key('class') and tag['class'].find('bloglist-fecha') != -1) # noqa
|
||||
for entry in blogs_soup.findAll('li'):
|
||||
article_title = entry.p
|
||||
article_url = article_title.a['href'] + '?print=yes'
|
||||
article_date = article_title.nextSibling
|
||||
article_title = self.tag_to_string(article_title)
|
||||
article_date = self.tag_to_string(article_date).replace(u'Last Updated: ', '')
|
||||
article_date = self.tag_to_string(
|
||||
article_date).replace(u'Last Updated: ', '')
|
||||
article_desc = self.tag_to_string(entry.find('h4'))
|
||||
|
||||
#print 'Article:',article_title, article_date,article_url
|
||||
articles.append({'title' : article_title,
|
||||
'date' : article_date,
|
||||
'description' : article_desc,
|
||||
'url' : article_url})
|
||||
# print 'Article:',article_title, article_date,article_url
|
||||
articles.append({'title': article_title,
|
||||
'date': article_date,
|
||||
'description': article_desc,
|
||||
'url': article_url})
|
||||
|
||||
return articles
|
||||
|
||||
def boca_parse(self, soup):
|
||||
articles = []
|
||||
for entry in soup.findAll(lambda tag: tag.name == 'div' and tag.has_key('class') and tag['class'].find('hentry') != -1): #soup.findAll('li', 'hentry'):
|
||||
article_title= entry.find('h2', 'entry-title')
|
||||
# soup.findAll('li', 'hentry'):
|
||||
for entry in soup.findAll(lambda tag: tag.name == 'div' and tag.has_key('class') and tag['class'].find('hentry') != -1): # noqa
|
||||
article_title = entry.find('h2', 'entry-title')
|
||||
article_url = article_title.a['href'] + '?print=yes'
|
||||
article_title= self.tag_to_string(article_title)
|
||||
article_title = self.tag_to_string(article_title)
|
||||
article_date = entry.find('span', 'entry-date')
|
||||
article_date = self.tag_to_string(article_date)
|
||||
article_desc = self.tag_to_string(entry.find('div', 'entry-content'))
|
||||
article_desc = self.tag_to_string(
|
||||
entry.find('div', 'entry-content'))
|
||||
|
||||
#print 'Article:',article_title, article_date,article_url
|
||||
#print entry['class']
|
||||
# print 'Article:',article_title, article_date,article_url
|
||||
# print entry['class']
|
||||
|
||||
articles.append({'title' : article_title,
|
||||
'date' : article_date,
|
||||
'description' : article_desc,
|
||||
'url' : article_url})
|
||||
articles.append({'title': article_title,
|
||||
'date': article_date,
|
||||
'description': article_desc,
|
||||
'url': article_url})
|
||||
# Avoid including the multimedia stuff.
|
||||
if entry['class'].find('last') != -1:
|
||||
break
|
||||
|
||||
return articles
|
||||
|
||||
|
||||
|
||||
|
||||
def parse_index(self):
|
||||
gobierno_soup = self.index_to_soup(self.INDEX+'gobierno/')
|
||||
congreso_soup = self.index_to_soup(self.INDEX+'congreso/')
|
||||
seguridad_soup = self.index_to_soup(self.INDEX+'seguridad/')
|
||||
comunidad_soup = self.index_to_soup(self.INDEX+'comunidad/')
|
||||
plumaje_soup = self.index_to_soup(self.INDEX+'plumaje/')
|
||||
la_boca_del_lobo_soup = self.index_to_soup(self.INDEX+'category/la-boca-del-lobo/')
|
||||
gobierno_soup = self.index_to_soup(self.INDEX + 'gobierno/')
|
||||
congreso_soup = self.index_to_soup(self.INDEX + 'congreso/')
|
||||
seguridad_soup = self.index_to_soup(self.INDEX + 'seguridad/')
|
||||
comunidad_soup = self.index_to_soup(self.INDEX + 'comunidad/')
|
||||
plumaje_soup = self.index_to_soup(self.INDEX + 'plumaje/')
|
||||
la_boca_del_lobo_soup = self.index_to_soup(
|
||||
self.INDEX + 'category/la-boca-del-lobo/')
|
||||
|
||||
gobierno_articles = self.generic_parse(gobierno_soup)
|
||||
congreso_articles = self.generic_parse(congreso_soup)
|
||||
@ -106,6 +107,5 @@ class AdvancedUserRecipe1290663986(BasicNewsRecipe):
|
||||
plumaje_articles = self.plumaje_parse(plumaje_soup)
|
||||
la_boca_del_lobo_articles = self.boca_parse(la_boca_del_lobo_soup)
|
||||
|
||||
|
||||
return [ (u'Gobierno', gobierno_articles), (u'Congreso', congreso_articles), (u'Seguridad', seguridad_articles),
|
||||
return [(u'Gobierno', gobierno_articles), (u'Congreso', congreso_articles), (u'Seguridad', seguridad_articles),
|
||||
(u'Comunidad', comunidad_articles), (u'Plumaje', plumaje_articles), (u'La Boca del Lobo', la_boca_del_lobo_articles), ]
|
||||
|
@ -1,6 +1,7 @@
|
||||
#-*- coding: utf-8 -*-
|
||||
# -*- coding: utf-8 -*-
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class AntywebRecipe(BasicNewsRecipe):
|
||||
encoding = 'utf-8'
|
||||
__license__ = 'GPL v3'
|
||||
@ -10,27 +11,28 @@ class AntywebRecipe(BasicNewsRecipe):
|
||||
title = u'Antyweb'
|
||||
category = u'News'
|
||||
description = u'Blog o internecie i nowych technologiach'
|
||||
cover_url=''
|
||||
remove_empty_feeds= True
|
||||
cover_url = ''
|
||||
remove_empty_feeds = True
|
||||
auto_cleanup = False
|
||||
no_stylesheets=True
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
remove_javascript = True
|
||||
simultaneous_downloads = 10
|
||||
ignore_duplicate_articles = {'title', 'url'} # zignoruj zduplikowane artykuły o takich samych tytułach LUB adresach
|
||||
scale_news_images =True
|
||||
conversion_options = { 'tags' : u'news, aplikacje mobilne, Android, iOS, Windows Phone ',
|
||||
'smarten_punctuation' : True,
|
||||
'publisher' : 'AntyWeb'
|
||||
# zignoruj zduplikowane artykuły o takich samych tytułach LUB adresach
|
||||
ignore_duplicate_articles = {'title', 'url'}
|
||||
scale_news_images = True
|
||||
conversion_options = {'tags': u'news, aplikacje mobilne, Android, iOS, Windows Phone ',
|
||||
'smarten_punctuation': True,
|
||||
'publisher': 'AntyWeb'
|
||||
} # opcje konwersji.
|
||||
|
||||
keep_only_tags=[]
|
||||
keep_only_tags.append(dict(name = 'h1'))
|
||||
keep_only_tags.append(dict(name = 'article', attrs = {'class' : 'article'}))
|
||||
remove_tags =[]
|
||||
remove_tags.append(dict(name = 'div', attrs = {'class' : 'ac-footer group'}))
|
||||
keep_only_tags = []
|
||||
keep_only_tags.append(dict(name='h1'))
|
||||
keep_only_tags.append(dict(name='article', attrs={'class': 'article'}))
|
||||
remove_tags = []
|
||||
remove_tags.append(dict(name='div', attrs={'class': 'ac-footer group'}))
|
||||
|
||||
feeds = [
|
||||
(u'News', 'http://feeds.feedburner.com/antyweb'),
|
||||
@ -42,6 +44,7 @@ class AntywebRecipe(BasicNewsRecipe):
|
||||
(u'Google', 'http://feeds.feedburner.com/AntywebGoogle'),
|
||||
(u'Microsoft', 'http://feeds.feedburner.com/AntywebMicrosoft')
|
||||
]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
for alink in soup.findAll('a'):
|
||||
if alink.string is not None:
|
||||
|
@ -10,17 +10,19 @@ class AssociatedPress(BasicNewsRecipe):
|
||||
language = 'en'
|
||||
no_stylesheets = True
|
||||
conversion_options = {
|
||||
'linearize_tables' : True
|
||||
'linearize_tables': True
|
||||
}
|
||||
keep_only_tags = {'name':'table', 'attrs':{'class':lambda x: x and 'ap-story-table' in x.split()}}
|
||||
keep_only_tags = {'name': 'table', 'attrs': {
|
||||
'class': lambda x: x and 'ap-story-table' in x.split()}}
|
||||
remove_tags = [
|
||||
{'class':['ap-mediabox-table']},
|
||||
{'name':'img', 'src':lambda x: x and '//analytics.' in x},
|
||||
{'class': ['ap-mediabox-table']},
|
||||
{'name': 'img', 'src': lambda x: x and '//analytics.' in x},
|
||||
]
|
||||
|
||||
def parse_index(self):
|
||||
feeds = []
|
||||
fronts = ('HOME', 'US', 'WORLD', 'BUSINESS', 'TECHNOLOGY', 'SPORTS', 'ENTERTAINMENT', 'HEALTH', 'SCIENCE', 'POLITICS')
|
||||
fronts = ('HOME', 'US', 'WORLD', 'BUSINESS', 'TECHNOLOGY',
|
||||
'SPORTS', 'ENTERTAINMENT', 'HEALTH', 'SCIENCE', 'POLITICS')
|
||||
for front in fronts:
|
||||
feeds.append([front.capitalize(), self.parse_section(front)])
|
||||
feeds[0][0] = 'Top Stories'
|
||||
@ -28,19 +30,20 @@ class AssociatedPress(BasicNewsRecipe):
|
||||
|
||||
def parse_section(self, front):
|
||||
self.log('Processing section:', front)
|
||||
soup = self.index_to_soup('http://hosted.ap.org/dynamic/fronts/%s?SITE=AP' % front)
|
||||
soup = self.index_to_soup(
|
||||
'http://hosted.ap.org/dynamic/fronts/%s?SITE=AP' % front)
|
||||
|
||||
articles = []
|
||||
for x in soup.findAll('p', attrs={'class':['ap-newsbriefitem-p', 'ap-topheadlineitem-p']}):
|
||||
for x in soup.findAll('p', attrs={'class': ['ap-newsbriefitem-p', 'ap-topheadlineitem-p']}):
|
||||
a = x.find('a', href=True)
|
||||
title = self.tag_to_string(a)
|
||||
url = "http://hosted.ap.org" + a['href']
|
||||
p = x.find(attrs={'class':'topheadlinebody'})
|
||||
p = x.find(attrs={'class': 'topheadlinebody'})
|
||||
desc = ''
|
||||
if p is not None:
|
||||
desc = self.tag_to_string(p)
|
||||
self.log('\tFound article:', title, '\n\t\t', desc)
|
||||
articles.append({'title':title, 'url':url})
|
||||
articles.append({'title': title, 'url': url})
|
||||
|
||||
self.log('\n\n')
|
||||
|
||||
|
@ -10,6 +10,7 @@ http://www.apcom.NET/
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class Apcom(BasicNewsRecipe):
|
||||
__author__ = 'Marini Gabriele'
|
||||
description = 'Italian daily newspaper'
|
||||
@ -28,15 +29,13 @@ class Apcom(BasicNewsRecipe):
|
||||
recursion = 100
|
||||
|
||||
no_stylesheets = True
|
||||
conversion_options = {'linearize_tables':True}
|
||||
conversion_options = {'linearize_tables': True}
|
||||
remove_javascript = True
|
||||
|
||||
keep_only_tags = [
|
||||
dict(name='div', attrs={'id':'ag_center'})
|
||||
dict(name='div', attrs={'id': 'ag_center'})
|
||||
]
|
||||
|
||||
|
||||
|
||||
feeds = [
|
||||
(u'Globale', u'http://www.apcom.net/rss/globale.xml '),
|
||||
(u'Politica', u'http://www.apcom.net/rss/politica.xml'),
|
||||
|
@ -1,5 +1,6 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class APOD(BasicNewsRecipe):
|
||||
title = u'Astronomy Picture of the Day'
|
||||
__author__ = 'Starson17'
|
||||
@ -23,6 +24,7 @@ class APOD(BasicNewsRecipe):
|
||||
p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
|
||||
body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
|
||||
'''
|
||||
|
||||
def postprocess_html(self, soup, first_fetch):
|
||||
center_tags = soup.findAll(['center'])
|
||||
p_tags = soup.findAll(['p'])
|
||||
@ -35,4 +37,3 @@ class APOD(BasicNewsRecipe):
|
||||
for tag in last2_p:
|
||||
tag.extract()
|
||||
return soup
|
||||
|
||||
|
@ -9,18 +9,19 @@ appfunds.blogspot.com
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class app_funds(BasicNewsRecipe):
|
||||
title = u'APP Funds'
|
||||
__author__ = 'teepel <teepel44@gmail.com>'
|
||||
language = 'pl'
|
||||
description ='Blog inwestora dla inwestorów i oszczędzających'
|
||||
INDEX='http://appfunds.blogspot.com'
|
||||
remove_empty_feeds= True
|
||||
description = 'Blog inwestora dla inwestorów i oszczędzających'
|
||||
INDEX = 'http://appfunds.blogspot.com'
|
||||
remove_empty_feeds = True
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
simultaneous_downloads = 5
|
||||
remove_javascript=True
|
||||
no_stylesheets=True
|
||||
remove_javascript = True
|
||||
no_stylesheets = True
|
||||
auto_cleanup = True
|
||||
|
||||
feeds = [(u'blog', u'http://feeds.feedburner.com/blogspot/etVI')]
|
||||
|
@ -6,7 +6,9 @@ __Date__ = ''
|
||||
|
||||
from calibre import (__appname__, force_unicode, strftime)
|
||||
from calibre.utils.date import now as nowf
|
||||
import os, datetime, re
|
||||
import os
|
||||
import datetime
|
||||
import re
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
from contextlib import nested
|
||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup
|
||||
@ -15,6 +17,7 @@ from calibre.ebooks.metadata.toc import TOC
|
||||
from calibre.ebooks.metadata import MetaInformation
|
||||
from calibre.utils.localization import canonicalize_lang
|
||||
|
||||
|
||||
class AppleDaily(BasicNewsRecipe):
|
||||
title = u'蘋果日報 (香港)'
|
||||
__author__ = 'Eddie Lau'
|
||||
@ -32,42 +35,42 @@ class AppleDaily(BasicNewsRecipe):
|
||||
category = 'Chinese, News, Hong Kong'
|
||||
masthead_url = 'http://upload.wikimedia.org/wikipedia/zh/c/cf/AppleDailyLogo1.png'
|
||||
|
||||
extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px; max-height:90%;} h1 {font-size:200%; text-align:left; font-weight:bold;} p[class=video-caption] {font-size:50%; margin-left:auto; margin-right:auto;}'
|
||||
keep_only_tags = [dict(name='div', attrs={'id':'content-article'})]
|
||||
remove_tags = [dict(name='div', attrs={'class':'prev-next-btn'}),
|
||||
dict(name='p', attrs={'class':'next'})]
|
||||
extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px; max-height:90%;} h1 {font-size:200%; text-align:left; font-weight:bold;} p[class=video-caption] {font-size:50%; margin-left:auto; margin-right:auto;}' # noqa
|
||||
keep_only_tags = [dict(name='div', attrs={'id': 'content-article'})]
|
||||
remove_tags = [dict(name='div', attrs={'class': 'prev-next-btn'}),
|
||||
dict(name='p', attrs={'class': 'next'})]
|
||||
|
||||
def get_dtlocal(self):
|
||||
dt_utc = datetime.datetime.utcnow()
|
||||
# convert UTC to local hk time - at HKT 6am, all news are available
|
||||
return dt_utc + datetime.timedelta(8.0/24) - datetime.timedelta(6.0/24)
|
||||
return dt_utc + datetime.timedelta(8.0 / 24) - datetime.timedelta(6.0 / 24)
|
||||
|
||||
def get_fetchdate(self):
|
||||
if __Date__ <> '':
|
||||
if __Date__ != '':
|
||||
return __Date__
|
||||
else:
|
||||
return self.get_dtlocal().strftime("%Y%m%d")
|
||||
|
||||
def get_fetchformatteddate(self):
|
||||
if __Date__ <> '':
|
||||
return __Date__[0:4]+'-'+__Date__[4:6]+'-'+__Date__[6:8]
|
||||
if __Date__ != '':
|
||||
return __Date__[0:4] + '-' + __Date__[4:6] + '-' + __Date__[6:8]
|
||||
else:
|
||||
return self.get_dtlocal().strftime("%Y-%m-%d")
|
||||
|
||||
def get_fetchyear(self):
|
||||
if __Date__ <> '':
|
||||
if __Date__ != '':
|
||||
return __Date__[0:4]
|
||||
else:
|
||||
return self.get_dtlocal().strftime("%Y")
|
||||
|
||||
def get_fetchmonth(self):
|
||||
if __Date__ <> '':
|
||||
if __Date__ != '':
|
||||
return __Date__[4:6]
|
||||
else:
|
||||
return self.get_dtlocal().strftime("%m")
|
||||
|
||||
def get_fetchday(self):
|
||||
if __Date__ <> '':
|
||||
if __Date__ != '':
|
||||
return __Date__[6:8]
|
||||
else:
|
||||
return self.get_dtlocal().strftime("%d")
|
||||
@ -78,7 +81,7 @@ class AppleDaily(BasicNewsRecipe):
|
||||
|
||||
def get_cover_url(self):
|
||||
soup = self.index_to_soup('http://hkm.appledaily.com/')
|
||||
cover = soup.find(attrs={'class':'top-news'}).get('src', False)
|
||||
cover = soup.find(attrs={'class': 'top-news'}).get('src', False)
|
||||
br = BasicNewsRecipe.get_browser(self)
|
||||
try:
|
||||
br.open(cover)
|
||||
@ -90,12 +93,12 @@ class AppleDaily(BasicNewsRecipe):
|
||||
if first and hasattr(self, 'add_toc_thumbnail'):
|
||||
picdiv = soup.find('img')
|
||||
if picdiv is not None:
|
||||
self.add_toc_thumbnail(article,picdiv['src'])
|
||||
self.add_toc_thumbnail(article, picdiv['src'])
|
||||
|
||||
def parse_index(self):
|
||||
feeds = []
|
||||
soup = self.index_to_soup('http://hkm.appledaily.com/')
|
||||
ul = soup.find(attrs={'class':'menu'})
|
||||
ul = soup.find(attrs={'class': 'menu'})
|
||||
sectionList = []
|
||||
for li in ul.findAll('li'):
|
||||
relativea = li.find('a', href=True).get('href', False)
|
||||
@ -111,13 +114,14 @@ class AppleDaily(BasicNewsRecipe):
|
||||
|
||||
def parse_section(self, url):
|
||||
soup = self.index_to_soup(url)
|
||||
ul = soup.find(attrs={'class':'list'})
|
||||
ul = soup.find(attrs={'class': 'list'})
|
||||
current_articles = []
|
||||
for li in ul.findAll('li'):
|
||||
a = li.find('a', href=True)
|
||||
title = li.find('p', text=True).strip()
|
||||
if a is not None:
|
||||
current_articles.append({'title': title, 'url':'http://hkm.appledaily.com/' + a.get('href', False)})
|
||||
current_articles.append(
|
||||
{'title': title, 'url': 'http://hkm.appledaily.com/' + a.get('href', False)})
|
||||
pass
|
||||
return current_articles
|
||||
|
||||
@ -131,7 +135,8 @@ class AppleDaily(BasicNewsRecipe):
|
||||
mi.publisher = __appname__
|
||||
mi.author_sort = __appname__
|
||||
if self.publication_type:
|
||||
mi.publication_type = 'periodical:'+self.publication_type+':'+self.short_title()
|
||||
mi.publication_type = 'periodical:' + \
|
||||
self.publication_type + ':' + self.short_title()
|
||||
mi.timestamp = nowf()
|
||||
article_titles, aseen = [], set()
|
||||
for f in feeds:
|
||||
@ -150,9 +155,10 @@ class AppleDaily(BasicNewsRecipe):
|
||||
if language is not None:
|
||||
mi.language = language
|
||||
# This one affects the pub date shown in kindle title
|
||||
#mi.pubdate = nowf()
|
||||
# mi.pubdate = nowf()
|
||||
# now appears to need the time field to be > 12.00noon as well
|
||||
mi.pubdate = datetime.datetime(int(self.get_fetchyear()), int(self.get_fetchmonth()), int(self.get_fetchday()), 12, 30, 0)
|
||||
mi.pubdate = datetime.datetime(int(self.get_fetchyear()), int(
|
||||
self.get_fetchmonth()), int(self.get_fetchday()), 12, 30, 0)
|
||||
opf_path = os.path.join(dir, 'index.opf')
|
||||
ncx_path = os.path.join(dir, 'index.ncx')
|
||||
|
||||
@ -161,12 +167,14 @@ class AppleDaily(BasicNewsRecipe):
|
||||
mp = getattr(self, 'masthead_path', None)
|
||||
if mp is not None and os.access(mp, os.R_OK):
|
||||
from calibre.ebooks.metadata.opf2 import Guide
|
||||
ref = Guide.Reference(os.path.basename(self.masthead_path), os.getcwdu())
|
||||
ref = Guide.Reference(os.path.basename(
|
||||
self.masthead_path), os.getcwdu())
|
||||
ref.type = 'masthead'
|
||||
ref.title = 'Masthead Image'
|
||||
opf.guide.append(ref)
|
||||
|
||||
manifest = [os.path.join(dir, 'feed_%d'%i) for i in range(len(feeds))]
|
||||
manifest = [os.path.join(dir, 'feed_%d' % i)
|
||||
for i in range(len(feeds))]
|
||||
manifest.append(os.path.join(dir, 'index.html'))
|
||||
manifest.append(os.path.join(dir, 'index.ncx'))
|
||||
|
||||
@ -197,12 +205,11 @@ class AppleDaily(BasicNewsRecipe):
|
||||
self.play_order_counter = 0
|
||||
self.play_order_map = {}
|
||||
|
||||
|
||||
def feed_index(num, parent):
|
||||
f = feeds[num]
|
||||
for j, a in enumerate(f):
|
||||
if getattr(a, 'downloaded', False):
|
||||
adir = 'feed_%d/article_%d/'%(num, j)
|
||||
adir = 'feed_%d/article_%d/' % (num, j)
|
||||
auth = a.author
|
||||
if not auth:
|
||||
auth = None
|
||||
@ -212,16 +219,18 @@ class AppleDaily(BasicNewsRecipe):
|
||||
else:
|
||||
desc = self.description_limiter(desc)
|
||||
tt = a.toc_thumbnail if a.toc_thumbnail else None
|
||||
entries.append('%sindex.html'%adir)
|
||||
entries.append('%sindex.html' % adir)
|
||||
po = self.play_order_map.get(entries[-1], None)
|
||||
if po is None:
|
||||
self.play_order_counter += 1
|
||||
po = self.play_order_counter
|
||||
parent.add_item('%sindex.html'%adir, None,
|
||||
a.title if a.title else _('Untitled Article'),
|
||||
parent.add_item('%sindex.html' % adir, None,
|
||||
a.title if a.title else _(
|
||||
'Untitled Article'),
|
||||
play_order=po, author=auth,
|
||||
description=desc, toc_thumbnail=tt)
|
||||
last = os.path.join(self.output_dir, ('%sindex.html'%adir).replace('/', os.sep))
|
||||
last = os.path.join(
|
||||
self.output_dir, ('%sindex.html' % adir).replace('/', os.sep))
|
||||
for sp in a.sub_pages:
|
||||
prefix = os.path.commonprefix([opf_path, sp])
|
||||
relp = sp[len(prefix):]
|
||||
@ -234,12 +243,14 @@ class AppleDaily(BasicNewsRecipe):
|
||||
soup = BeautifulSoup(src)
|
||||
body = soup.find('body')
|
||||
if body is not None:
|
||||
prefix = '/'.join('..'for i in range(2*len(re.findall(r'link\d+', last))))
|
||||
prefix = '/'.join('..'for i in range(2 *
|
||||
len(re.findall(r'link\d+', last))))
|
||||
templ = self.navbar.generate(True, num, j, len(f),
|
||||
not self.has_single_feed,
|
||||
a.orig_url, __appname__, prefix=prefix,
|
||||
center=self.center_navbar)
|
||||
elem = BeautifulSoup(templ.render(doctype='xhtml').decode('utf-8')).find('div')
|
||||
elem = BeautifulSoup(templ.render(
|
||||
doctype='xhtml').decode('utf-8')).find('div')
|
||||
body.insert(len(body.contents), elem)
|
||||
with open(last, 'wb') as fi:
|
||||
fi.write(unicode(soup).encode('utf-8'))
|
||||
@ -248,7 +259,7 @@ class AppleDaily(BasicNewsRecipe):
|
||||
|
||||
if len(feeds) > 1:
|
||||
for i, f in enumerate(feeds):
|
||||
entries.append('feed_%d/index.html'%i)
|
||||
entries.append('feed_%d/index.html' % i)
|
||||
po = self.play_order_map.get(entries[-1], None)
|
||||
if po is None:
|
||||
self.play_order_counter += 1
|
||||
@ -259,11 +270,11 @@ class AppleDaily(BasicNewsRecipe):
|
||||
desc = getattr(f, 'description', None)
|
||||
if not desc:
|
||||
desc = None
|
||||
feed_index(i, toc.add_item('feed_%d/index.html'%i, None,
|
||||
feed_index(i, toc.add_item('feed_%d/index.html' % i, None,
|
||||
f.title, play_order=po, description=desc, author=auth))
|
||||
|
||||
else:
|
||||
entries.append('feed_%d/index.html'%0)
|
||||
entries.append('feed_%d/index.html' % 0)
|
||||
feed_index(0, toc)
|
||||
|
||||
for i, p in enumerate(entries):
|
||||
@ -273,5 +284,3 @@ class AppleDaily(BasicNewsRecipe):
|
||||
|
||||
with nested(open(opf_path, 'wb'), open(ncx_path, 'wb')) as (opf_file, ncx_file):
|
||||
opf.render(opf_file, ncx_file)
|
||||
|
||||
|
||||
|
@ -34,12 +34,12 @@ class AppledailyTW(BasicNewsRecipe):
|
||||
{'name': 'hr'}
|
||||
]
|
||||
conversion_options = {
|
||||
'title' : title,
|
||||
'comments' : description,
|
||||
'tags' : category,
|
||||
'language' : language,
|
||||
'publisher' : publisher,
|
||||
'authors' : publisher,
|
||||
'title': title,
|
||||
'comments': description,
|
||||
'tags': category,
|
||||
'language': language,
|
||||
'publisher': publisher,
|
||||
'authors': publisher,
|
||||
'linearize_tables': True
|
||||
}
|
||||
feeds = [
|
||||
@ -105,5 +105,6 @@ class AppledailyTW(BasicNewsRecipe):
|
||||
|
||||
def preprocess_raw_html(self, raw_html, url):
|
||||
raw_html = re.sub(ur'<a href=".*?<br><br>.*?<\/a>', '', raw_html)
|
||||
raw_html = re.sub(ur'<title>(.*?)[\s]+\|.*<\/title>', '<title>\1<\/title>', raw_html)
|
||||
raw_html = re.sub(
|
||||
ur'<title>(.*?)[\s]+\|.*<\/title>', '<title>\1<\/title>', raw_html)
|
||||
return raw_html
|
||||
|
@ -2,6 +2,7 @@
|
||||
from __future__ import unicode_literals
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class BasicUserRecipe1395137685(BasicNewsRecipe):
|
||||
title = u'Applefobia'
|
||||
__author__ = 'koliberek'
|
||||
@ -12,10 +13,10 @@ class BasicUserRecipe1395137685(BasicNewsRecipe):
|
||||
remove_empty_feeds = True
|
||||
remove_javascript = True
|
||||
conversion_options = {
|
||||
'tags' : u'newsy, Apple, humor',
|
||||
'smarten_punctuation' : True,
|
||||
'authors' : 'Ogrodnik January',
|
||||
'publisher' : 'Blogspot.pl'
|
||||
'tags': u'newsy, Apple, humor',
|
||||
'smarten_punctuation': True,
|
||||
'authors': 'Ogrodnik January',
|
||||
'publisher': 'Blogspot.pl'
|
||||
}
|
||||
reverse_article_order = True
|
||||
|
||||
|
@ -1,5 +1,6 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class AmericanProspect(BasicNewsRecipe):
|
||||
title = u'American Prospect'
|
||||
__author__ = u'Michael Heinz, a.peter'
|
||||
@ -12,11 +13,8 @@ class AmericanProspect(BasicNewsRecipe):
|
||||
no_stylesheets = True
|
||||
remove_javascript = True
|
||||
|
||||
#keep_only_tags = [dict(name='div', attrs={'class':'pad_10L10R'})]
|
||||
#remove_tags = [dict(name='form'), dict(name='div', attrs={'class':['bkt_caption','sharebox noprint','badgebox']})]
|
||||
use_embedded_content = False
|
||||
|
||||
no_stylesheets = True
|
||||
auto_cleanup = True
|
||||
feeds = [(u'Articles', u'feed://www.prospect.org/articles_rss.jsp')]
|
||||
|
||||
|
@ -5,6 +5,7 @@ __docformat__ = 'restructuredtext en'
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class AdvancedUserRecipe1335656316(BasicNewsRecipe):
|
||||
title = u'AraInfo.org'
|
||||
__author__ = 'Ruben Pollan'
|
||||
@ -16,4 +17,9 @@ class AdvancedUserRecipe1335656316(BasicNewsRecipe):
|
||||
auto_cleanup = True
|
||||
cover_url = u'http://arainfo.org/wordpress/wp-content/uploads/2011/10/logo-web_alta.jpg'
|
||||
|
||||
feeds = [(u'Movimientos', u'http://arainfo.org/category/movimientos/feed/'), (u'Econom\xeda', u'http://arainfo.org/category/economia/feed/'), (u'Ecolog\xeda', u'http://arainfo.org/category/ecologia/feed/'), (u'Culturas', u'http://arainfo.org/category/culturas/feed/'), (u'Altavoz', u'http://arainfo.org/category/altavoz/feed/')]
|
||||
feeds = [
|
||||
(u'Movimientos', u'http://arainfo.org/category/movimientos/feed/'),
|
||||
(u'Econom\xeda', u'http://arainfo.org/category/economia/feed/'),
|
||||
(u'Ecolog\xeda', u'http://arainfo.org/category/ecologia/feed/'),
|
||||
(u'Culturas', u'http://arainfo.org/category/culturas/feed/'),
|
||||
(u'Altavoz', u'http://arainfo.org/category/altavoz/feed/')]
|
||||
|
@ -6,12 +6,12 @@ www.arabianbusiness.com
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class Arabian_Business(BasicNewsRecipe):
|
||||
title = 'Arabian Business'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'Comprehensive Guide to Middle East Business & Gulf Industry News including,Banking & Finance,Construction,Energy,Media & Marketing,Real Estate,Transportation,Travel,Technology,Politics,Healthcare,Lifestyle,Jobs & UAE guide.Top Gulf & Dubai Business News.'
|
||||
description = 'Comprehensive Guide to Middle East Business & Gulf Industry News including,Banking & Finance,Construction,Energy,Media & Marketing,Real Estate,Transportation,Travel,Technology,Politics,Healthcare,Lifestyle,Jobs & UAE guide.Top Gulf & Dubai Business News.' # noqa
|
||||
publisher = 'Arabian Business Publishing Ltd.'
|
||||
category = 'ArabianBusiness.com,Arab Business News,Middle East Business News,Middle East Business,Arab Media News,Industry Events,Middle East Industry News,Arab Business Industry,Dubai Business News,Financial News,UAE Business News,Middle East Press Releases,Gulf News,Arab News,GCC Business News,Banking Finance,Media Marketing,Construction,Oil Gas,Retail,Transportation,Travel Hospitality,Photos,Videos,Life Style,Fashion,United Arab Emirates,UAE,Dubai,Sharjah,Abu Dhabi,Qatar,KSA,Saudi Arabia,Bahrain,Kuwait,Oman,Europe,South Asia,America,Asia,news'
|
||||
oldest_article = 2
|
||||
max_articles_per_feed = 200
|
||||
no_stylesheets = True
|
||||
@ -29,48 +29,45 @@ class Arabian_Business(BasicNewsRecipe):
|
||||
"""
|
||||
|
||||
conversion_options = {
|
||||
'comment' : description
|
||||
, 'tags' : category
|
||||
, 'publisher' : publisher
|
||||
, 'language' : language
|
||||
'comment': description, 'publisher': publisher, 'language': language
|
||||
}
|
||||
|
||||
remove_tags_before=dict(attrs={'id':'article-title'})
|
||||
remove_tags_before = dict(attrs={'id': 'article-title'})
|
||||
remove_tags = [
|
||||
dict(name=['meta','link','base','iframe','embed','object'])
|
||||
,dict(attrs={'class':'printfooter'})
|
||||
dict(name=['meta', 'link', 'base', 'iframe', 'embed', 'object']), dict(
|
||||
attrs={'class': 'printfooter'})
|
||||
]
|
||||
remove_attributes=['lang']
|
||||
|
||||
remove_attributes = ['lang']
|
||||
|
||||
feeds = [
|
||||
(u'Africa' , u'http://www.arabianbusiness.com/world/Africa/?service=rss' )
|
||||
,(u'Americas' , u'http://www.arabianbusiness.com/world/americas/?service=rss' )
|
||||
,(u'Asia Pacific' , u'http://www.arabianbusiness.com/world/asia-pacific/?service=rss' )
|
||||
,(u'Europe' , u'http://www.arabianbusiness.com/world/europe/?service=rss' )
|
||||
,(u'Middle East' , u'http://www.arabianbusiness.com/world/middle-east/?service=rss' )
|
||||
,(u'South Asia' , u'http://www.arabianbusiness.com/world/south-asia/?service=rss' )
|
||||
,(u'Banking & Finance', u'http://www.arabianbusiness.com/industries/banking-finance/?service=rss' )
|
||||
,(u'Construction' , u'http://www.arabianbusiness.com/industries/construction/?service=rss' )
|
||||
,(u'Education' , u'http://www.arabianbusiness.com/industries/education/?service=rss' )
|
||||
,(u'Energy' , u'http://www.arabianbusiness.com/industries/energy/?service=rss' )
|
||||
,(u'Healthcare' , u'http://www.arabianbusiness.com/industries/healthcare/?service=rss' )
|
||||
,(u'Media' , u'http://www.arabianbusiness.com/industries/media/?service=rss' )
|
||||
,(u'Real Estate' , u'http://www.arabianbusiness.com/industries/real-estate/?service=rss' )
|
||||
,(u'Retail' , u'http://www.arabianbusiness.com/industries/retail/?service=rss' )
|
||||
,(u'Technology' , u'http://www.arabianbusiness.com/industries/technology/?service=rss' )
|
||||
,(u'Transport' , u'http://www.arabianbusiness.com/industries/transport/?service=rss' )
|
||||
,(u'Travel' , u'http://www.arabianbusiness.com/industries/travel-hospitality/?service=rss')
|
||||
,(u'Equities' , u'http://www.arabianbusiness.com/markets/equities/?service=rss' )
|
||||
,(u'Commodities' , u'http://www.arabianbusiness.com/markets/commodities/?service=rss' )
|
||||
,(u'Currencies' , u'http://www.arabianbusiness.com/markets/currencies/?service=rss' )
|
||||
,(u'Market Data' , u'http://www.arabianbusiness.com/markets/market-data/?service=rss' )
|
||||
,(u'Comment' , u'http://www.arabianbusiness.com/opinion/comment/?service=rss' )
|
||||
,(u'Think Tank' , u'http://www.arabianbusiness.com/opinion/think-tank/?service=rss' )
|
||||
,(u'Arts' , u'http://www.arabianbusiness.com/lifestyle/arts/?service=rss' )
|
||||
,(u'Cars' , u'http://www.arabianbusiness.com/lifestyle/cars/?service=rss' )
|
||||
,(u'Food' , u'http://www.arabianbusiness.com/lifestyle/food/?service=rss' )
|
||||
,(u'Sport' , u'http://www.arabianbusiness.com/lifestyle/sport/?service=rss' )
|
||||
|
||||
(u'Africa', u'http://www.arabianbusiness.com/world/Africa/?service=rss'),
|
||||
(u'Americas', u'http://www.arabianbusiness.com/world/americas/?service=rss'),
|
||||
(u'Asia Pacific', u'http://www.arabianbusiness.com/world/asia-pacific/?service=rss'),
|
||||
(u'Europe', u'http://www.arabianbusiness.com/world/europe/?service=rss'),
|
||||
(u'Middle East', u'http://www.arabianbusiness.com/world/middle-east/?service=rss'),
|
||||
(u'South Asia', u'http://www.arabianbusiness.com/world/south-asia/?service=rss'),
|
||||
(u'Banking & Finance', u'http://www.arabianbusiness.com/industries/banking-finance/?service=rss'),
|
||||
(u'Construction', u'http://www.arabianbusiness.com/industries/construction/?service=rss'),
|
||||
(u'Education', u'http://www.arabianbusiness.com/industries/education/?service=rss'),
|
||||
(u'Energy', u'http://www.arabianbusiness.com/industries/energy/?service=rss'),
|
||||
(u'Healthcare', u'http://www.arabianbusiness.com/industries/healthcare/?service=rss'),
|
||||
(u'Media', u'http://www.arabianbusiness.com/industries/media/?service=rss'),
|
||||
(u'Real Estate', u'http://www.arabianbusiness.com/industries/real-estate/?service=rss'),
|
||||
(u'Retail', u'http://www.arabianbusiness.com/industries/retail/?service=rss'),
|
||||
(u'Technology', u'http://www.arabianbusiness.com/industries/technology/?service=rss'),
|
||||
(u'Transport', u'http://www.arabianbusiness.com/industries/transport/?service=rss'),
|
||||
(u'Travel', u'http://www.arabianbusiness.com/industries/travel-hospitality/?service=rss'),
|
||||
(u'Equities', u'http://www.arabianbusiness.com/markets/equities/?service=rss'),
|
||||
(u'Commodities', u'http://www.arabianbusiness.com/markets/commodities/?service=rss'),
|
||||
(u'Currencies', u'http://www.arabianbusiness.com/markets/currencies/?service=rss'),
|
||||
(u'Market Data', u'http://www.arabianbusiness.com/markets/market-data/?service=rss'),
|
||||
(u'Comment', u'http://www.arabianbusiness.com/opinion/comment/?service=rss'),
|
||||
(u'Think Tank', u'http://www.arabianbusiness.com/opinion/think-tank/?service=rss'),
|
||||
(u'Arts', u'http://www.arabianbusiness.com/lifestyle/arts/?service=rss'),
|
||||
(u'Cars', u'http://www.arabianbusiness.com/lifestyle/cars/?service=rss'),
|
||||
(u'Food', u'http://www.arabianbusiness.com/lifestyle/food/?service=rss'),
|
||||
(u'Sport', u'http://www.arabianbusiness.com/lifestyle/sport/?service=rss')
|
||||
]
|
||||
|
||||
def print_version(self, url):
|
||||
|
@ -1,5 +1,6 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class Arbetaren_SE(BasicNewsRecipe):
|
||||
title = u'Arbetaren'
|
||||
__author__ = 'Joakim Lindskog'
|
||||
@ -15,19 +16,16 @@ class Arbetaren_SE(BasicNewsRecipe):
|
||||
language = 'sv'
|
||||
|
||||
conversion_options = {
|
||||
'comment' : description
|
||||
, 'tags' : category
|
||||
, 'publisher' : publisher
|
||||
, 'language' : language
|
||||
'comment': description, 'tags': category, 'publisher': publisher, 'language': language
|
||||
}
|
||||
|
||||
keep_only_tags = [dict(name='div', attrs={'id':'article'})]
|
||||
remove_tags_before = dict(name='div', attrs={'id':'article'})
|
||||
remove_tags_after = dict(name='p',attrs={'id':'byline'})
|
||||
keep_only_tags = [dict(name='div', attrs={'id': 'article'})]
|
||||
remove_tags_before = dict(name='div', attrs={'id': 'article'})
|
||||
remove_tags_after = dict(name='p', attrs={'id': 'byline'})
|
||||
remove_tags = [
|
||||
dict(name=['object','link','base']),
|
||||
dict(name='p', attrs={'class':'print'}),
|
||||
dict(name='a', attrs={'class':'addthis_button_compact'}),
|
||||
dict(name=['object', 'link', 'base']),
|
||||
dict(name='p', attrs={'class': 'print'}),
|
||||
dict(name='a', attrs={'class': 'addthis_button_compact'}),
|
||||
dict(name='script')
|
||||
]
|
||||
|
||||
|
@ -3,16 +3,17 @@
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
import re
|
||||
|
||||
|
||||
class Arcadia_BBS(BasicNewsRecipe):
|
||||
title = u'Arcadia'
|
||||
__author__ = 'Masahiro Hasegawa'
|
||||
language = 'ja'
|
||||
encoding = 'utf8'
|
||||
filter_regexps = [r'ad\.jp\.ap\.valuecommerce.com',]
|
||||
filter_regexps = [r'ad\.jp\.ap\.valuecommerce.com', ]
|
||||
timefmt = '[%Y/%m/%d]'
|
||||
remove_tags_before = dict(name='a', attrs={'name':'kiji'})
|
||||
remove_tags_before = dict(name='a', attrs={'name': 'kiji'})
|
||||
|
||||
sid_list = [] #some sotory id
|
||||
sid_list = [] # some sotory id
|
||||
|
||||
def parse_index(self):
|
||||
result = []
|
||||
@ -21,15 +22,12 @@ class Arcadia_BBS(BasicNewsRecipe):
|
||||
soup = self.index_to_soup(
|
||||
'http://www.mai-net.net/bbs/sst/sst.php?act=dump&all=%d'
|
||||
% sid)
|
||||
sec = soup.findAll('a', attrs={'href':re.compile(r'.*?kiji')})
|
||||
sec = soup.findAll('a', attrs={'href': re.compile(r'.*?kiji')})
|
||||
for s in sec[:-2]:
|
||||
s_result.append(dict(title=s.string,
|
||||
url="http://www.mai-net.net" + s['href'],
|
||||
date=s.parent.parent.parent.findAll('td')[3].string[:-6],
|
||||
date=s.parent.parent.parent.findAll('td')[
|
||||
3].string[:-6],
|
||||
description='', content=''))
|
||||
result.append((s_result[0]['title'], s_result))
|
||||
return result
|
||||
|
||||
|
||||
|
||||
|
||||
|
@ -10,30 +10,29 @@ import os
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
from calibre.ptempfile import PersistentTemporaryDirectory
|
||||
|
||||
|
||||
class Arcamax(BasicNewsRecipe):
|
||||
title = 'Arcamax'
|
||||
__author__ = 'Kovid Goyal'
|
||||
description = u'Family Friendly Comics - Customize for more days/comics: Defaults to 7 days, 25 comics - 20 general, 5 editorial.'
|
||||
category = 'news, comics'
|
||||
language = 'en'
|
||||
use_embedded_content= False
|
||||
use_embedded_content = False
|
||||
no_stylesheets = True
|
||||
remove_javascript = True
|
||||
cover_url = 'http://www.arcamax.com/images/pub/amuse/leftcol/zits.jpg'
|
||||
|
||||
# ###### USER PREFERENCES - SET COMICS AND NUMBER OF COMICS TO RETRIEVE ########
|
||||
# ###### USER PREFERENCES - SET COMICS AND NUMBER OF COMICS TO RETRIEVE ##
|
||||
num_comics_to_get = 7
|
||||
# CHOOSE COMIC STRIPS BELOW - REMOVE COMMENT '# ' FROM IN FRONT OF DESIRED STRIPS
|
||||
# CHOOSE COMIC STRIPS BELOW - REMOVE COMMENT '# ' FROM IN FRONT OF DESIRED
|
||||
# STRIPS
|
||||
|
||||
conversion_options = {'linearize_tables' : True
|
||||
, 'comment' : description
|
||||
, 'tags' : category
|
||||
, 'language' : language
|
||||
conversion_options = {'linearize_tables': True, 'comment': description, 'tags': category, 'language': language
|
||||
}
|
||||
|
||||
keep_only_tags = [
|
||||
dict(name='header', attrs={'class':'fn-content-header bluelabel'}),
|
||||
dict(name='figure', attrs={'class':['comic']}),
|
||||
dict(name='header', attrs={'class': 'fn-content-header bluelabel'}),
|
||||
dict(name='figure', attrs={'class': ['comic']}),
|
||||
]
|
||||
|
||||
def parse_index(self):
|
||||
@ -93,18 +92,22 @@ class Arcamax(BasicNewsRecipe):
|
||||
num -= 1
|
||||
raw = self.index_to_soup(url, raw=True)
|
||||
self.panel_counter += 1
|
||||
path = os.path.join(self.panel_tdir, '%d.html' % self.panel_counter)
|
||||
path = os.path.join(self.panel_tdir, '%d.html' %
|
||||
self.panel_counter)
|
||||
with open(path, 'wb') as f:
|
||||
f.write(raw)
|
||||
soup = self.index_to_soup(raw)
|
||||
a = soup.find(name='a', attrs={'class':['prev']})
|
||||
a = soup.find(name='a', attrs={'class': ['prev']})
|
||||
prev_page_url = 'http://www.arcamax.com' + a['href']
|
||||
title = self.tag_to_string(soup.find('title')).partition('|')[0].strip()
|
||||
title = self.tag_to_string(
|
||||
soup.find('title')).partition('|')[0].strip()
|
||||
if 'for' not in title.split():
|
||||
title = title + ' for today'
|
||||
date = self.tag_to_string(soup.find(name='span', attrs={'class':['cur']}))
|
||||
date = self.tag_to_string(
|
||||
soup.find(name='span', attrs={'class': ['cur']}))
|
||||
self.log('\tFound:', title, 'at:', url)
|
||||
current_articles.append({'title': title, 'url':'file://' + path , 'description':'', 'date': date})
|
||||
current_articles.append(
|
||||
{'title': title, 'url': 'file://' + path, 'description': '', 'date': date})
|
||||
if self.test and len(current_articles) >= self.test[1]:
|
||||
break
|
||||
url = prev_page_url
|
||||
|
@ -1,20 +1,22 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class Archeowiesci(BasicNewsRecipe):
|
||||
title = u'Archeowieści'
|
||||
__author__ = 'fenuks'
|
||||
category = 'archeology'
|
||||
language = 'pl'
|
||||
description = u'Z pasją o przeszłości'
|
||||
cover_url='http://archeowiesci.pl/wp-content/uploads/2011/05/Archeowiesci2-115x115.jpg'
|
||||
cover_url = 'http://archeowiesci.pl/wp-content/uploads/2011/05/Archeowiesci2-115x115.jpg'
|
||||
oldest_article = 7
|
||||
needs_subscription='optional'
|
||||
needs_subscription = 'optional'
|
||||
max_articles_per_feed = 100
|
||||
auto_cleanup = True
|
||||
remove_tags=[dict(name='span', attrs={'class':['post-ratings', 'post-ratings-loading']})]
|
||||
remove_tags = [
|
||||
dict(name='span', attrs={'class': ['post-ratings', 'post-ratings-loading']})]
|
||||
feeds = [(u'Archeowieści', u'http://archeowiesci.pl/feed/')]
|
||||
|
||||
def parse_feeds (self):
|
||||
def parse_feeds(self):
|
||||
feeds = BasicNewsRecipe.parse_feeds(self)
|
||||
for feed in feeds:
|
||||
for article in feed.articles[:]:
|
||||
|
@ -10,6 +10,7 @@ import time
|
||||
from calibre import strftime
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
|
||||
|
||||
class ArgNoticias(BasicNewsRecipe):
|
||||
title = 'ARG Noticias'
|
||||
__author__ = 'Darko Miletic'
|
||||
@ -28,27 +29,27 @@ class ArgNoticias(BasicNewsRecipe):
|
||||
extra_css = ''
|
||||
|
||||
conversion_options = {
|
||||
'comment' : description
|
||||
, 'tags' : category
|
||||
, 'publisher': publisher
|
||||
, 'language' : language
|
||||
'comment': description, 'tags': category, 'publisher': publisher, 'language': language
|
||||
}
|
||||
|
||||
keep_only_tags = [dict(name='div', attrs={'class':['itemHeader','itemBody','itemAuthorBlock']})]
|
||||
keep_only_tags = [
|
||||
dict(name='div', attrs={'class': ['itemHeader', 'itemBody', 'itemAuthorBlock']})]
|
||||
|
||||
remove_tags = [
|
||||
dict(name=['object','link','base','iframe']),
|
||||
dict(name='div', attrs={'class':['b2jsocial_parent','itemSocialSharing']})
|
||||
dict(name=['object', 'link', 'base', 'iframe']),
|
||||
dict(name='div', attrs={
|
||||
'class': ['b2jsocial_parent', 'itemSocialSharing']})
|
||||
]
|
||||
|
||||
feeds = [
|
||||
(u'Politica' , u'http://www.argnoticias.com/index.php/politica' )
|
||||
,(u'Economia' , u'http://www.argnoticias.com/index.php/economia' )
|
||||
,(u'Sociedad' , u'http://www.argnoticias.com/index.php/sociedad' )
|
||||
,(u'Mundo' , u'http://www.argnoticias.com/index.php/mundo' )
|
||||
,(u'Deportes' , u'http://www.argnoticias.com/index.php/deportes' )
|
||||
,(u'Espectaculos', u'http://www.argnoticias.com/index.php/espectaculos')
|
||||
,(u'Tendencias' , u'http://www.argnoticias.com/index.php/tendencias' )
|
||||
|
||||
(u'Politica', u'http://www.argnoticias.com/index.php/politica'),
|
||||
(u'Economia', u'http://www.argnoticias.com/index.php/economia'),
|
||||
(u'Sociedad', u'http://www.argnoticias.com/index.php/sociedad'),
|
||||
(u'Mundo', u'http://www.argnoticias.com/index.php/mundo'),
|
||||
(u'Deportes', u'http://www.argnoticias.com/index.php/deportes'),
|
||||
(u'Espectaculos', u'http://www.argnoticias.com/index.php/espectaculos'),
|
||||
(u'Tendencias', u'http://www.argnoticias.com/index.php/tendencias')
|
||||
]
|
||||
|
||||
def parse_index(self):
|
||||
@ -57,40 +58,37 @@ class ArgNoticias(BasicNewsRecipe):
|
||||
checker = []
|
||||
for feedobj in lfeeds:
|
||||
feedtitle, feedurl = feedobj
|
||||
self.report_progress(0, _('Fetching feed')+' %s...'%(feedtitle if feedtitle else feedurl))
|
||||
self.report_progress(0, _('Fetching feed') + ' %s...' %
|
||||
(feedtitle if feedtitle else feedurl))
|
||||
articles = []
|
||||
soup = self.index_to_soup(feedurl)
|
||||
for item in soup.findAll('div', attrs={'class':'Nota'}):
|
||||
atag = item.find('a', attrs={'class':'moduleItemTitle'})
|
||||
ptag = item.find('div', attrs={'class':'moduleItemIntrotext'})
|
||||
for item in soup.findAll('div', attrs={'class': 'Nota'}):
|
||||
atag = item.find('a', attrs={'class': 'moduleItemTitle'})
|
||||
ptag = item.find('div', attrs={'class': 'moduleItemIntrotext'})
|
||||
url = self.INDEX + atag['href']
|
||||
title = self.tag_to_string(atag)
|
||||
description = self.tag_to_string(ptag)
|
||||
date = strftime("%a, %d %b %Y %H:%M:%S +0000",time.gmtime())
|
||||
date = strftime("%a, %d %b %Y %H:%M:%S +0000", time.gmtime())
|
||||
if url not in checker:
|
||||
checker.append(url)
|
||||
articles.append({
|
||||
'title' :title
|
||||
,'date' :date
|
||||
,'url' :url
|
||||
,'description':description
|
||||
'title': title, 'date': date, 'url': url, 'description': description
|
||||
})
|
||||
|
||||
for item in soup.findAll('li'):
|
||||
atag = item.find('a', attrs={'class':'moduleItemTitle'})
|
||||
atag = item.find('a', attrs={'class': 'moduleItemTitle'})
|
||||
if atag:
|
||||
ptag = item.find('div', attrs={'class':'moduleItemIntrotext'})
|
||||
ptag = item.find(
|
||||
'div', attrs={'class': 'moduleItemIntrotext'})
|
||||
url = self.INDEX + atag['href']
|
||||
title = self.tag_to_string(atag)
|
||||
description = self.tag_to_string(ptag)
|
||||
date = strftime("%a, %d %b %Y %H:%M:%S +0000",time.gmtime())
|
||||
date = strftime(
|
||||
"%a, %d %b %Y %H:%M:%S +0000", time.gmtime())
|
||||
if url not in checker:
|
||||
checker.append(url)
|
||||
articles.append({
|
||||
'title' :title
|
||||
,'date' :date
|
||||
,'url' :url
|
||||
,'description':description
|
||||
'title': title, 'date': date, 'url': url, 'description': description
|
||||
})
|
||||
totalfeeds.append((feedtitle, articles))
|
||||
return totalfeeds
|
||||
|
@ -5,11 +5,12 @@ azrepublic.com
|
||||
'''
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
|
||||
|
||||
class AdvancedUserRecipe1307301031(BasicNewsRecipe):
|
||||
title = u'AZRepublic'
|
||||
__author__ = 'Jim Olo'
|
||||
language = 'en'
|
||||
description = "The Arizona Republic is Arizona's leading provider of news and information, and has published a daily newspaper in Phoenix for more than 110 years"
|
||||
description = "The Arizona Republic is Arizona's leading provider of news and information, and has published a daily newspaper in Phoenix for more than 110 years" # noqa
|
||||
publisher = 'AZRepublic/AZCentral'
|
||||
masthead_url = 'http://freedom2t.com/wp-content/uploads/press_az_republic_v2.gif'
|
||||
cover_url = 'http://www.valleyleadership.org/Common/Img/2line4c_AZRepublic%20with%20azcentral%20logo.jpg'
|
||||
@ -21,31 +22,43 @@ class AdvancedUserRecipe1307301031(BasicNewsRecipe):
|
||||
no_stylesheets = True
|
||||
remove_javascript = True
|
||||
# extra_css = '.headline {font-size: medium;} \n .fact { padding-top: 10pt }'
|
||||
extra_css = ' body{ font-family: Verdana,Helvetica,Arial,sans-serif } .headline {font-size: medium} .introduction{font-weight: bold} .story-feature{display: block; padding: 0; border: 1px solid; width: 40%; font-size: small} .story-feature h2{text-align: center; text-transform: uppercase} '
|
||||
extra_css = ' body{ font-family: Verdana,Helvetica,Arial,sans-serif } .headline {font-size: medium} .introduction{font-weight: bold} .story-feature{display: block; padding: 0; border: 1px solid; width: 40%; font-size: small} .story-feature h2{text-align: center; text-transform: uppercase} ' # noqa
|
||||
|
||||
remove_attributes = ['width','height','h2','subHeadline','style']
|
||||
remove_attributes = ['width', 'height', 'h2', 'subHeadline', 'style']
|
||||
remove_tags = [
|
||||
dict(name='div', attrs={'id':['slidingBillboard', 'top728x90', 'subindex-header', 'topSearch']}),
|
||||
dict(name='div', attrs={'id':['simplesearch', 'azcLoginBox', 'azcLoginBoxInner', 'topNav']}),
|
||||
dict(name='div', attrs={'id':['carsDrop', 'homesDrop', 'rentalsDrop', 'classifiedDrop']}),
|
||||
dict(name='div', attrs={'id':['nav', 'mp', 'subnav', 'jobsDrop']}),
|
||||
dict(name='h6', attrs={'class':['section-header']}),
|
||||
dict(name='a', attrs={'href':['#comments']}),
|
||||
dict(name='div', attrs={'class':['articletools clearfix', 'floatRight']}),
|
||||
dict(name='div', attrs={'id':['fbFrame', 'ob', 'storyComments', 'storyGoogleAdBox']}),
|
||||
dict(name='div', attrs={'id':['storyTopHomes', 'openRight', 'footerwrap', 'copyright']}),
|
||||
dict(name='div', attrs={'id':['blogsHed', 'blog_comments', 'blogByline','blogTopics']}),
|
||||
dict(name='div', attrs={'id':['membersRightMain', 'dealsfooter', 'azrTopHed', 'azrRightCol']}),
|
||||
dict(name='div', attrs={'id':['ttdHeader', 'ttdTimeWeather']}),
|
||||
dict(name='div', attrs={'id':['membersRightMain', 'deals-header-wrap']}),
|
||||
dict(name='div', attrs={'id':['todoTopSearchBar', 'byline clearfix', 'subdex-topnav']}),
|
||||
dict(name='h1', attrs={'id':['SEOtext']}),
|
||||
dict(name='table', attrs={'class':['ap-mediabox-table']}),
|
||||
dict(name='p', attrs={'class':['ap_para']}),
|
||||
dict(name='span', attrs={'class':['source-org vcard', 'org fn']}),
|
||||
dict(name='a', attrs={'href':['http://hosted2.ap.org/APDEFAULT/privacy']}),
|
||||
dict(name='a', attrs={'href':['http://hosted2.ap.org/APDEFAULT/terms']}),
|
||||
dict(name='div', attrs={'id':['onespot_nextclick']}),
|
||||
dict(name='div', attrs={
|
||||
'id': ['slidingBillboard', 'top728x90', 'subindex-header', 'topSearch']}),
|
||||
dict(name='div', attrs={
|
||||
'id': ['simplesearch', 'azcLoginBox', 'azcLoginBoxInner', 'topNav']}),
|
||||
dict(name='div', attrs={
|
||||
'id': ['carsDrop', 'homesDrop', 'rentalsDrop', 'classifiedDrop']}),
|
||||
dict(name='div', attrs={'id': ['nav', 'mp', 'subnav', 'jobsDrop']}),
|
||||
dict(name='h6', attrs={'class': ['section-header']}),
|
||||
dict(name='a', attrs={'href': ['#comments']}),
|
||||
dict(name='div', attrs={
|
||||
'class': ['articletools clearfix', 'floatRight']}),
|
||||
dict(name='div', attrs={
|
||||
'id': ['fbFrame', 'ob', 'storyComments', 'storyGoogleAdBox']}),
|
||||
dict(name='div', attrs={
|
||||
'id': ['storyTopHomes', 'openRight', 'footerwrap', 'copyright']}),
|
||||
dict(name='div', attrs={
|
||||
'id': ['blogsHed', 'blog_comments', 'blogByline', 'blogTopics']}),
|
||||
dict(name='div', attrs={
|
||||
'id': ['membersRightMain', 'dealsfooter', 'azrTopHed', 'azrRightCol']}),
|
||||
dict(name='div', attrs={'id': ['ttdHeader', 'ttdTimeWeather']}),
|
||||
dict(name='div', attrs={
|
||||
'id': ['membersRightMain', 'deals-header-wrap']}),
|
||||
dict(name='div', attrs={
|
||||
'id': ['todoTopSearchBar', 'byline clearfix', 'subdex-topnav']}),
|
||||
dict(name='h1', attrs={'id': ['SEOtext']}),
|
||||
dict(name='table', attrs={'class': ['ap-mediabox-table']}),
|
||||
dict(name='p', attrs={'class': ['ap_para']}),
|
||||
dict(name='span', attrs={'class': ['source-org vcard', 'org fn']}),
|
||||
dict(name='a', attrs={
|
||||
'href': ['http://hosted2.ap.org/APDEFAULT/privacy']}),
|
||||
dict(name='a', attrs={
|
||||
'href': ['http://hosted2.ap.org/APDEFAULT/terms']}),
|
||||
dict(name='div', attrs={'id': ['onespot_nextclick']}),
|
||||
]
|
||||
|
||||
feeds = [
|
||||
@ -62,7 +75,3 @@ class AdvancedUserRecipe1307301031(BasicNewsRecipe):
|
||||
(u'ArizonaDeals', u'http://www.azcentral.com/members/Blog%7E/RealDealsblog'),
|
||||
(u'GroceryDeals', u'http://www.azcentral.com/members/Blog%7E/RealDealsblog/tag/2646')
|
||||
]
|
||||
|
||||
|
||||
|
||||
|
||||
|
@ -1,4 +1,6 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class ArmyTimes(BasicNewsRecipe):
|
||||
title = 'Army Times'
|
||||
__author__ = 'jde'
|
||||
@ -11,7 +13,7 @@ class ArmyTimes(BasicNewsRecipe):
|
||||
tags = 'news, U.S. Army'
|
||||
cover_url = 'http://www.armytimes.com/images/logo_armytimes_alert.jpg'
|
||||
masthead_url = 'http://www.armytimes.com/images/logo_armytimes_alert.jpg'
|
||||
oldest_article = 7 #days
|
||||
oldest_article = 7 # days
|
||||
max_articles_per_feed = 25
|
||||
publication_type = 'newspaper'
|
||||
no_stylesheets = True
|
||||
@ -23,20 +25,15 @@ class ArmyTimes(BasicNewsRecipe):
|
||||
remove_empty_feeds = True
|
||||
auto_cleanup = True
|
||||
|
||||
|
||||
|
||||
feeds = [
|
||||
|
||||
('News', 'http://www.armytimes.com/rss_news.php'),
|
||||
('Benefits', 'http://www.armytimes.com/rss_benefits.php'),
|
||||
('Money', 'http://www.armytimes.com/rss_money.php'),
|
||||
('Careers & Education', 'http://www.armytimes.com/rss_careers.php'),
|
||||
('Community', 'http://www.armytimes.com/rss_community.php'),
|
||||
('Off Duty', 'http://www.armytimes.com/rss_off_duty.php'),
|
||||
('Entertainment', 'http://www.armytimes.com/rss_entertainment.php'),
|
||||
('Guard & Reserve', 'http://www.armytimes.com/rss_guard.php'),
|
||||
('News', 'http://www.armytimes.com/rss_news.php'),
|
||||
('Benefits', 'http://www.armytimes.com/rss_benefits.php'),
|
||||
('Money', 'http://www.armytimes.com/rss_money.php'),
|
||||
('Careers & Education', 'http://www.armytimes.com/rss_careers.php'),
|
||||
('Community', 'http://www.armytimes.com/rss_community.php'),
|
||||
('Off Duty', 'http://www.armytimes.com/rss_off_duty.php'),
|
||||
('Entertainment', 'http://www.armytimes.com/rss_entertainment.php'),
|
||||
('Guard & Reserve', 'http://www.armytimes.com/rss_guard.php'),
|
||||
|
||||
]
|
||||
|
||||
|
||||
|
||||
|
@ -7,6 +7,7 @@ __description__ = 'Get some fresh news from Arrêt sur images'
|
||||
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
|
||||
|
||||
class Asi(BasicNewsRecipe):
|
||||
|
||||
title = 'Arrêt sur images'
|
||||
@ -32,9 +33,10 @@ class Asi(BasicNewsRecipe):
|
||||
('Contenus et dossiers', 'http://www.arretsurimages.net/dossiers.rss'),
|
||||
]
|
||||
|
||||
conversion_options = { 'smarten_punctuation' : True }
|
||||
conversion_options = {'smarten_punctuation': True}
|
||||
|
||||
remove_tags = [dict(id='vite-titre'), dict(id='header'), dict(id='wrap-connexion'), dict(id='col_right'), dict(name='div', attrs={'class':'bloc-chroniqueur-2'}), dict(id='footercontainer')]
|
||||
remove_tags = [dict(id='vite-titre'), dict(id='header'), dict(id='wrap-connexion'), dict(id='col_right'),
|
||||
dict(name='div', attrs={'class': 'bloc-chroniqueur-2'}), dict(id='footercontainer')]
|
||||
|
||||
def print_version(self, url):
|
||||
return url.replace('contenu.php', 'contenu-imprimable.php')
|
||||
@ -51,4 +53,3 @@ class Asi(BasicNewsRecipe):
|
||||
br['password'] = self.password
|
||||
br.submit()
|
||||
return br
|
||||
|
||||
|
@ -7,6 +7,7 @@ arstechnica.com
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup
|
||||
|
||||
|
||||
class ArsTechnica(BasicNewsRecipe):
|
||||
title = u'Ars Technica'
|
||||
language = 'en'
|
||||
@ -31,56 +32,48 @@ class ArsTechnica(BasicNewsRecipe):
|
||||
'''
|
||||
|
||||
conversion_options = {
|
||||
'comments' : description
|
||||
,'tags' : category
|
||||
,'language' : language
|
||||
,'publisher' : publisher
|
||||
'comments': description, 'tags': category, 'language': language, 'publisher': publisher
|
||||
}
|
||||
|
||||
keep_only_tags = [
|
||||
dict(attrs={'class':'standalone'})
|
||||
,dict(attrs={'id':'article-guts'})
|
||||
dict(attrs={'class': 'standalone'}), dict(attrs={'id': 'article-guts'})
|
||||
]
|
||||
|
||||
remove_tags = [
|
||||
dict(name=['object','link','embed','iframe','meta'])
|
||||
,dict(attrs={'class':'corner-info'})
|
||||
,dict(attrs={'id': 'article-footer-wrap'})
|
||||
,dict(attrs={'class': 'article-expander'})
|
||||
,dict(name='nav',attrs={'class': 'subheading'})
|
||||
dict(name=['object', 'link', 'embed', 'iframe', 'meta']), dict(attrs={'class': 'corner-info'}), dict(attrs={
|
||||
'id': 'article-footer-wrap'}), dict(attrs={'class': 'article-expander'}), dict(name='nav', attrs={'class': 'subheading'})
|
||||
]
|
||||
remove_attributes = ['lang']
|
||||
|
||||
feeds = [
|
||||
|
||||
(u'Ars Features (All our long-form feature articles)' , u'http://feeds.arstechnica.com/arstechnica/features')
|
||||
, (u'Technology Lab (Information Technology)' , u'http://feeds.arstechnica.com/arstechnica/technology-lab')
|
||||
,(u'Gear & Gadgets' , u'http://feeds.arstechnica.com/arstechnica/gadgets')
|
||||
,(u'Ministry of Innovation (Business of Technology)' , u'http://feeds.arstechnica.com/arstechnica/business')
|
||||
,(u'Risk Assessment (Security & Hacktivism)' , u'http://feeds.arstechnica.com/arstechnica/security')
|
||||
,(u'Law & Disorder (Civilizations & Discontents)' , u'http://feeds.arstechnica.com/arstechnica/tech-policy')
|
||||
,(u'Infinite Loop (Apple Ecosystem)' , u'http://feeds.arstechnica.com/arstechnica/apple')
|
||||
,(u'Opposable Thumbs (Gaming & Entertainment)' , u'http://feeds.arstechnica.com/arstechnica/gaming')
|
||||
,(u'Scientific Method (Science & Exploration)' , u'http://feeds.arstechnica.com/arstechnica/science')
|
||||
,(u'Multiverse (Exploratoins & Meditations on Sci-Fi)' , u'http://feeds.arstechnica.com/arstechnica/multiverse')
|
||||
,(u'Cars Technica (All Things Automotive)' , u'http://feeds.arstechnica.com/arstechnica/cars')
|
||||
,(u'Staff Blogs (From the Minds of Ars)' , u'http://feeds.arstechnica.com/arstechnica/staff-blogs')
|
||||
(u'Ars Features (All our long-form feature articles)', u'http://feeds.arstechnica.com/arstechnica/features'),
|
||||
(u'Technology Lab (Information Technology)', u'http://feeds.arstechnica.com/arstechnica/technology-lab'),
|
||||
(u'Gear & Gadgets', u'http://feeds.arstechnica.com/arstechnica/gadgets'),
|
||||
(u'Ministry of Innovation (Business of Technology)', u'http://feeds.arstechnica.com/arstechnica/business'),
|
||||
(u'Risk Assessment (Security & Hacktivism)', u'http://feeds.arstechnica.com/arstechnica/security'),
|
||||
(u'Law & Disorder (Civilizations & Discontents)', u'http://feeds.arstechnica.com/arstechnica/tech-policy'),
|
||||
(u'Infinite Loop (Apple Ecosystem)', u'http://feeds.arstechnica.com/arstechnica/apple'),
|
||||
(u'Opposable Thumbs (Gaming & Entertainment)', u'http://feeds.arstechnica.com/arstechnica/gaming'),
|
||||
(u'Scientific Method (Science & Exploration)', u'http://feeds.arstechnica.com/arstechnica/science'),
|
||||
(u'Multiverse (Exploratoins & Meditations on Sci-Fi)', u'http://feeds.arstechnica.com/arstechnica/multiverse'),
|
||||
(u'Cars Technica (All Things Automotive)', u'http://feeds.arstechnica.com/arstechnica/cars'),
|
||||
(u'Staff Blogs (From the Minds of Ars)', u'http://feeds.arstechnica.com/arstechnica/staff-blogs')
|
||||
]
|
||||
|
||||
def append_page(self, soup, appendtag, position):
|
||||
pager = soup.find(attrs={'class':'numbers'})
|
||||
pager = soup.find(attrs={'class': 'numbers'})
|
||||
if pager:
|
||||
nexttag = pager.find(attrs={'class':'next'})
|
||||
nexttag = pager.find(attrs={'class': 'next'})
|
||||
if nexttag:
|
||||
nurl = nexttag.parent['href']
|
||||
rawc = self.index_to_soup(nurl,True)
|
||||
rawc = self.index_to_soup(nurl, True)
|
||||
soup2 = BeautifulSoup(rawc, fromEncoding=self.encoding)
|
||||
texttag = soup2.find(attrs={'id':'article-guts'})
|
||||
texttag = soup2.find(attrs={'id': 'article-guts'})
|
||||
newpos = len(texttag.contents)
|
||||
self.append_page(soup2,texttag,newpos)
|
||||
self.append_page(soup2, texttag, newpos)
|
||||
texttag.extract()
|
||||
pager.extract()
|
||||
appendtag.insert(position,texttag)
|
||||
appendtag.insert(position, texttag)
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
self.append_page(soup, soup.body, 3)
|
||||
@ -102,4 +95,4 @@ class ArsTechnica(BasicNewsRecipe):
|
||||
return soup
|
||||
|
||||
def preprocess_raw_html(self, raw, url):
|
||||
return '<html><head>'+raw[raw.find('</head>'):]
|
||||
return '<html><head>' + raw[raw.find('</head>'):]
|
||||
|
@ -1,20 +1,18 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class HindustanTimes(BasicNewsRecipe):
|
||||
title = u'Asco de vida'
|
||||
language = 'es'
|
||||
__author__ = 'Krittika Goyal'
|
||||
oldest_article = 1 #days
|
||||
oldest_article = 1 # days
|
||||
max_articles_per_feed = 25
|
||||
#encoding = 'cp1252'
|
||||
use_embedded_content = False
|
||||
|
||||
no_stylesheets = True
|
||||
keep_only_tags = dict(name='div', attrs={'class':'box story'})
|
||||
|
||||
keep_only_tags = dict(name='div', attrs={'class': 'box story'})
|
||||
|
||||
feeds = [
|
||||
('News',
|
||||
('News',
|
||||
'http://feeds2.feedburner.com/AscoDeVida'),
|
||||
]
|
||||
|
||||
]
|
||||
|
@ -8,6 +8,7 @@ asiaone.com
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class AsiaOne(BasicNewsRecipe):
|
||||
title = u'AsiaOne'
|
||||
oldest_article = 2
|
||||
@ -17,10 +18,11 @@ class AsiaOne(BasicNewsRecipe):
|
||||
no_stylesheets = False
|
||||
language = 'en_SG'
|
||||
remove_javascript = True
|
||||
remove_tags = [dict(name='span', attrs={'class':'footer'})]
|
||||
remove_tags = [dict(name='span', attrs={'class': 'footer'})]
|
||||
keep_only_tags = [
|
||||
dict(name='h1', attrs={'class':'headline'}),
|
||||
dict(name='div', attrs={'class':['article-content','person-info row']})
|
||||
dict(name='h1', attrs={'class': 'headline'}),
|
||||
dict(name='div', attrs={
|
||||
'class': ['article-content', 'person-info row']})
|
||||
]
|
||||
|
||||
feeds = [
|
||||
|
@ -7,10 +7,11 @@ www.asianreviewofbooks.com
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class AsianReviewOfBooks(BasicNewsRecipe):
|
||||
title = 'The Asian Review of Books'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'In addition to reviewing books about or of relevance to Asia, the Asian Review of Books also features long-format essays by leading Asian writers and thinkers, to providing an unparalleled forum for discussion of key contemporary issues by Asians for Asia and a vehicle of intellectual depth and breadth where leading thinkers can write on the books, arts and ideas of the day. Widely quoted and referenced, with an archive of more than one thousand book reviews, it is the only web resource dedicated to Asian books. And now, with the addition of the new premium content, the Asian Review of Books, is a must-read publication.'
|
||||
description = 'In addition to reviewing books about or of relevance to Asia, the Asian Review of Books also features long-format essays by leading Asian writers and thinkers, to providing an unparalleled forum for discussion of key contemporary issues by Asians for Asia and a vehicle of intellectual depth and breadth where leading thinkers can write on the books, arts and ideas of the day. Widely quoted and referenced, with an archive of more than one thousand book reviews, it is the only web resource dedicated to Asian books. And now, with the addition of the new premium content, the Asian Review of Books, is a must-read publication.' # noqa
|
||||
publisher = 'The Asian Review of Books'
|
||||
category = 'literature, books, reviews, Asia'
|
||||
oldest_article = 30
|
||||
@ -31,16 +32,12 @@ class AsianReviewOfBooks(BasicNewsRecipe):
|
||||
"""
|
||||
|
||||
conversion_options = {
|
||||
'comment' : description
|
||||
, 'tags' : category
|
||||
, 'publisher' : publisher
|
||||
, 'language' : language
|
||||
'comment': description, 'tags': category, 'publisher': publisher, 'language': language
|
||||
}
|
||||
|
||||
|
||||
remove_tags = [dict(name=['object','script','iframe','embed'])]
|
||||
remove_tags = [dict(name=['object', 'script', 'iframe', 'embed'])]
|
||||
remove_attributes = ['style', 'onclick']
|
||||
feeds = [(u'Articles' , u'http://www.asianreviewofbooks.com/new/rss.php')]
|
||||
feeds = [(u'Articles', u'http://www.asianreviewofbooks.com/new/rss.php')]
|
||||
|
||||
def print_version(self, url):
|
||||
root, sep, artid = url.rpartition('?ID=')
|
||||
@ -48,4 +45,3 @@ class AsianReviewOfBooks(BasicNewsRecipe):
|
||||
|
||||
def preprocess_raw_html(self, raw, url):
|
||||
return '<html><head><title>title</title></head><body>' + raw + '</body></html>'
|
||||
|
||||
|
@ -1,17 +1,18 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class AstroNEWS(BasicNewsRecipe):
|
||||
title = u'AstroNEWS'
|
||||
__author__ = 'fenuks'
|
||||
description = u'AstroNEWS regularnie dostarcza wiadomości o wydarzeniach związanych z astronomią i astronautyką. Informujemy o aktualnych odkryciach i wydarzeniach naukowych, zapowiadamy ciekawe zjawiska astronomiczne. Serwis jest częścią portalu astronomicznego AstroNET prowadzonego przez miłośników astronomii i zawodowych astronomów.'
|
||||
description = u'AstroNEWS regularnie dostarcza wiadomości o wydarzeniach związanych z astronomią i astronautyką. Informujemy o aktualnych odkryciach i wydarzeniach naukowych, zapowiadamy ciekawe zjawiska astronomiczne. Serwis jest częścią portalu astronomicznego AstroNET prowadzonego przez miłośników astronomii i zawodowych astronomów.' # noqa
|
||||
category = 'astronomy, science'
|
||||
language = 'pl'
|
||||
oldest_article = 8
|
||||
max_articles_per_feed = 100
|
||||
#extra_css= 'table {text-align: left;}'
|
||||
no_stylesheets=True
|
||||
cover_url='http://news.astronet.pl/img/logo_news.jpg'
|
||||
no_stylesheets = True
|
||||
cover_url = 'http://news.astronet.pl/img/logo_news.jpg'
|
||||
remove_attributes = ['width', 'align']
|
||||
remove_tags=[dict(name='hr')]
|
||||
remove_tags = [dict(name='hr')]
|
||||
feeds = [(u'Wiadomości', u'http://news.astronet.pl/rss.cgi')]
|
||||
|
||||
def print_version(self, url):
|
||||
|
@ -1,11 +1,12 @@
|
||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:fdm=marker:ai
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class Astroflesz(BasicNewsRecipe):
|
||||
title = u'Astroflesz'
|
||||
oldest_article = 7
|
||||
__author__ = 'fenuks'
|
||||
description = u'astroflesz.pl - to portal poświęcony astronomii. Informuje zarówno o aktualnych wydarzeniach i odkryciach naukowych, jak również zapowiada ciekawe zjawiska astronomiczne'
|
||||
description = u'astroflesz.pl - to portal poświęcony astronomii. Informuje zarówno o aktualnych wydarzeniach i odkryciach naukowych, jak również zapowiada ciekawe zjawiska astronomiczne' # noqa
|
||||
category = 'astronomy'
|
||||
language = 'pl'
|
||||
cover_url = 'http://www.astroflesz.pl/templates/astroflesz/images/logo/logo.png'
|
||||
@ -16,12 +17,13 @@ class Astroflesz(BasicNewsRecipe):
|
||||
remove_empty_feeds = True
|
||||
remove_attributes = ['style']
|
||||
keep_only_tags = [dict(id="k2Container")]
|
||||
remove_tags_after = dict(name='div', attrs={'class':'itemLinks'})
|
||||
remove_tags = [dict(name='div', attrs={'class':['itemLinks', 'itemToolbar', 'itemRatingBlock']})]
|
||||
remove_tags_after = dict(name='div', attrs={'class': 'itemLinks'})
|
||||
remove_tags = [dict(name='div', attrs={
|
||||
'class': ['itemLinks', 'itemToolbar', 'itemRatingBlock']})]
|
||||
feeds = [(u'Wszystkie', u'http://astroflesz.pl/?format=feed')]
|
||||
|
||||
def postprocess_html(self, soup, first_fetch):
|
||||
t = soup.find(attrs={'class':'itemIntroText'})
|
||||
t = soup.find(attrs={'class': 'itemIntroText'})
|
||||
if t:
|
||||
for i in t.findAll('img'):
|
||||
i['style'] = 'float: left; margin-right: 5px;'
|
||||
|
@ -6,6 +6,7 @@ www.athensnews.gr
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class AthensNews(BasicNewsRecipe):
|
||||
title = 'Athens News'
|
||||
__author__ = 'Darko Miletic'
|
||||
@ -30,35 +31,31 @@ class AthensNews(BasicNewsRecipe):
|
||||
"""
|
||||
|
||||
conversion_options = {
|
||||
'comment' : description
|
||||
, 'tags' : category
|
||||
, 'publisher' : publisher
|
||||
, 'language' : language
|
||||
, 'linearize_tables' : True
|
||||
'comment': description, 'tags': category, 'publisher': publisher, 'language': language, 'linearize_tables': True
|
||||
}
|
||||
|
||||
remove_tags = [
|
||||
dict(name=['meta','link'])
|
||||
dict(name=['meta', 'link'])
|
||||
]
|
||||
keep_only_tags=[
|
||||
dict(name='span',attrs={'class':'big'})
|
||||
,dict(name='td', attrs={'class':['articlepubdate','text']})
|
||||
keep_only_tags = [
|
||||
dict(name='span', attrs={'class': 'big'}), dict(
|
||||
name='td', attrs={'class': ['articlepubdate', 'text']})
|
||||
]
|
||||
remove_attributes=['lang']
|
||||
|
||||
remove_attributes = ['lang']
|
||||
|
||||
feeds = [
|
||||
(u'News' , u'http://www.athensnews.gr/category/1/feed' )
|
||||
,(u'Politics' , u'http://www.athensnews.gr/category/8/feed' )
|
||||
,(u'Business' , u'http://www.athensnews.gr/category/2/feed' )
|
||||
,(u'Economy' , u'http://www.athensnews.gr/category/11/feed')
|
||||
,(u'Community' , u'http://www.athensnews.gr/category/5/feed' )
|
||||
,(u'Arts' , u'http://www.athensnews.gr/category/3/feed' )
|
||||
,(u'Living in Athens', u'http://www.athensnews.gr/category/7/feed' )
|
||||
,(u'Sports' , u'http://www.athensnews.gr/category/4/feed' )
|
||||
,(u'Travel' , u'http://www.athensnews.gr/category/6/feed' )
|
||||
,(u'Letters' , u'http://www.athensnews.gr/category/44/feed')
|
||||
,(u'Media' , u'http://www.athensnews.gr/multimedia/feed' )
|
||||
|
||||
(u'News', u'http://www.athensnews.gr/category/1/feed'),
|
||||
(u'Politics', u'http://www.athensnews.gr/category/8/feed'),
|
||||
(u'Business', u'http://www.athensnews.gr/category/2/feed'),
|
||||
(u'Economy', u'http://www.athensnews.gr/category/11/feed'),
|
||||
(u'Community', u'http://www.athensnews.gr/category/5/feed'),
|
||||
(u'Arts', u'http://www.athensnews.gr/category/3/feed'),
|
||||
(u'Living in Athens', u'http://www.athensnews.gr/category/7/feed'),
|
||||
(u'Sports', u'http://www.athensnews.gr/category/4/feed'),
|
||||
(u'Travel', u'http://www.athensnews.gr/category/6/feed'),
|
||||
(u'Letters', u'http://www.athensnews.gr/category/44/feed'),
|
||||
(u'Media', u'http://www.athensnews.gr/multimedia/feed')
|
||||
]
|
||||
|
||||
def print_version(self, url):
|
||||
|
@ -9,9 +9,11 @@ import html5lib
|
||||
from lxml import html
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
def classes(classes):
|
||||
q = frozenset(classes.split(' '))
|
||||
return dict(attrs={'class':lambda x:x and frozenset(x.split()).intersection(q)})
|
||||
return dict(attrs={'class': lambda x: x and frozenset(x.split()).intersection(q)})
|
||||
|
||||
|
||||
class TheAtlantic(BasicNewsRecipe):
|
||||
|
||||
@ -23,13 +25,14 @@ class TheAtlantic(BasicNewsRecipe):
|
||||
encoding = 'utf-8'
|
||||
|
||||
keep_only_tags = [
|
||||
classes('article-header article-body article-magazine metadata article-cover-content lead-img'),
|
||||
classes(
|
||||
'article-header article-body article-magazine metadata article-cover-content lead-img'),
|
||||
]
|
||||
remove_tags = [
|
||||
{'name': ['meta', 'link', 'noscript']},
|
||||
{'attrs':{'class':['offset-wrapper', 'ad-boxfeatures-wrapper']}},
|
||||
{'attrs':{'class':lambda x: x and 'article-tools' in x}},
|
||||
{'src':lambda x:x and 'spotxchange.com' in x},
|
||||
{'attrs': {'class': ['offset-wrapper', 'ad-boxfeatures-wrapper']}},
|
||||
{'attrs': {'class': lambda x: x and 'article-tools' in x}},
|
||||
{'src': lambda x: x and 'spotxchange.com' in x},
|
||||
]
|
||||
remove_tags_after = classes('article-body')
|
||||
|
||||
@ -48,7 +51,7 @@ class TheAtlantic(BasicNewsRecipe):
|
||||
return url + '?single_page=true'
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
for img in soup.findAll('img', attrs={'data-src':True}):
|
||||
for img in soup.findAll('img', attrs={'data-src': True}):
|
||||
img['src'] = img['data-src']
|
||||
return soup
|
||||
|
||||
@ -61,8 +64,8 @@ class TheAtlantic(BasicNewsRecipe):
|
||||
self.cover_url = img['src']
|
||||
current_section, current_articles = 'Cover Story', []
|
||||
feeds = []
|
||||
for div in soup.findAll('div', attrs={'class':lambda x: x and set(x.split()).intersection({'top-sections', 'bottom-sections'})}):
|
||||
for h2 in div.findAll('h2', attrs={'class':True}):
|
||||
for div in soup.findAll('div', attrs={'class': lambda x: x and set(x.split()).intersection({'top-sections', 'bottom-sections'})}):
|
||||
for h2 in div.findAll('h2', attrs={'class': True}):
|
||||
if 'section-name' in h2['class'].split():
|
||||
if current_articles:
|
||||
feeds.append((current_section, current_articles))
|
||||
@ -75,18 +78,22 @@ class TheAtlantic(BasicNewsRecipe):
|
||||
url = a['href']
|
||||
if url.startswith('/'):
|
||||
url = 'http://www.theatlantic.com' + url
|
||||
li = a.findParent('li', attrs={'class':lambda x: x and 'article' in x.split()})
|
||||
li = a.findParent(
|
||||
'li', attrs={'class': lambda x: x and 'article' in x.split()})
|
||||
desc = ''
|
||||
dek = li.find(attrs={'class':lambda x:x and 'dek' in x.split()})
|
||||
dek = li.find(
|
||||
attrs={'class': lambda x: x and 'dek' in x.split()})
|
||||
if dek is not None:
|
||||
desc += self.tag_to_string(dek)
|
||||
byline = li.find(attrs={'class':lambda x:x and 'byline' in x.split()})
|
||||
byline = li.find(
|
||||
attrs={'class': lambda x: x and 'byline' in x.split()})
|
||||
if byline is not None:
|
||||
desc += ' -- ' + self.tag_to_string(byline)
|
||||
self.log('\t', title, 'at', url)
|
||||
if desc:
|
||||
self.log('\t\t', desc)
|
||||
current_articles.append({'title':title, 'url':url, 'description':desc})
|
||||
current_articles.append(
|
||||
{'title': title, 'url': url, 'description': desc})
|
||||
if current_articles:
|
||||
feeds.append((current_section, current_articles))
|
||||
return feeds
|
||||
|
@ -3,6 +3,7 @@
|
||||
from __future__ import unicode_literals, division, absolute_import, print_function
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class AdvancedUserRecipe1421956712(BasicNewsRecipe):
|
||||
title = 'TheAtlantic.com'
|
||||
__author__ = 'ebrandon'
|
||||
|
@ -2,6 +2,7 @@
|
||||
from __future__ import unicode_literals
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class AttacEspanaRecipe (BasicNewsRecipe):
|
||||
__author__ = 'Marc Busqué <marc@lamarciana.com>'
|
||||
__url__ = 'http://www.lamarciana.com'
|
||||
@ -9,7 +10,7 @@ class AttacEspanaRecipe (BasicNewsRecipe):
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2012, Marc Busqué <marc@lamarciana.com>'
|
||||
title = u'attac.es'
|
||||
description = u'La Asociación por la Tasación de las Transacciones Financieras y por la Ayuda a los Ciudadanos (ATTAC) es un movimiento internacional altermundialista que promueve el control democrático de los mercados financieros y las instituciones encargadas de su control mediante la reflexión política y la movilización social.'
|
||||
description = u'La Asociación por la Tasación de las Transacciones Financieras y por la Ayuda a los Ciudadanos (ATTAC) es un movimiento internacional altermundialista que promueve el control democrático de los mercados financieros y las instituciones encargadas de su control mediante la reflexión política y la movilización social.' # noqa
|
||||
url = 'http://www.attac.es'
|
||||
language = 'es'
|
||||
tags = 'contrainformación, información alternativa'
|
||||
|
@ -11,13 +11,13 @@ http://www.corrieredellosport.it/
|
||||
'''
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class Auto(BasicNewsRecipe):
|
||||
__author__ = 'Gabriele Marini'
|
||||
description = 'Auto and Formula 1'
|
||||
|
||||
cover_url = 'http://www.auto.it/res/imgs/logo_Auto.png'
|
||||
|
||||
|
||||
title = u'Auto'
|
||||
publisher = 'CONTE Editore'
|
||||
category = 'Sport'
|
||||
@ -34,30 +34,23 @@ class Auto(BasicNewsRecipe):
|
||||
no_stylesheets = True
|
||||
|
||||
html2lrf_options = [
|
||||
'--comment', description
|
||||
, '--category', category
|
||||
, '--publisher', publisher
|
||||
, '--ignore-tables'
|
||||
'--comment', description, '--category', category, '--publisher', publisher, '--ignore-tables'
|
||||
]
|
||||
|
||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True'
|
||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + \
|
||||
description + '"\ntags="' + category + '"\nlinearize_tables=True'
|
||||
|
||||
keep_only_tags = [
|
||||
dict(name='h2', attrs={'class':['tit_Article y_Txt']}),
|
||||
dict(name='h2', attrs={'class':['tit_Article']}),
|
||||
dict(name='div', attrs={'class':['box_Img newsdet_new ']}),
|
||||
dict(name='div', attrs={'class':['box_Img newsdet_as ']}),
|
||||
dict(name='table', attrs={'class':['table_A']}),
|
||||
dict(name='div', attrs={'class':['txt_Article txtBox_cms']}),
|
||||
dict(name='h2', attrs={'class': ['tit_Article y_Txt']}),
|
||||
dict(name='h2', attrs={'class': ['tit_Article']}),
|
||||
dict(name='div', attrs={'class': ['box_Img newsdet_new ']}),
|
||||
dict(name='div', attrs={'class': ['box_Img newsdet_as ']}),
|
||||
dict(name='table', attrs={'class': ['table_A']}),
|
||||
dict(name='div', attrs={'class': ['txt_Article txtBox_cms']}),
|
||||
dict(name='testoscheda')]
|
||||
|
||||
|
||||
feeds = [
|
||||
(u'Tutte le News' , u'http://www.auto.it/rss/articoli.xml' ),
|
||||
(u'Prove su Strada' , u'http://www.auto.it/rss/prove+6.xml'),
|
||||
(u'Novit\xe0' , u'http://www.auto.it/rss/novita+3.xml')
|
||||
(u'Tutte le News', u'http://www.auto.it/rss/articoli.xml'),
|
||||
(u'Prove su Strada', u'http://www.auto.it/rss/prove+6.xml'),
|
||||
(u'Novit\xe0', u'http://www.auto.it/rss/novita+3.xml')
|
||||
]
|
||||
|
||||
|
||||
|
||||
|
||||
|
@ -1,5 +1,6 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class AutoBlog(BasicNewsRecipe):
|
||||
title = u'Auto Blog'
|
||||
__author__ = 'Welovelucy'
|
||||
@ -12,5 +13,3 @@ class AutoBlog(BasicNewsRecipe):
|
||||
|
||||
def print_version(self, url):
|
||||
return url + 'print/'
|
||||
|
||||
|
||||
|
@ -11,13 +11,13 @@ http://www.corrieredellosport.it/
|
||||
'''
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class AutoPR(BasicNewsRecipe):
|
||||
__author__ = 'Gabriele Marini'
|
||||
description = 'Auto and Formula 1'
|
||||
|
||||
cover_url = 'http://www.auto.it/res/imgs/logo_Auto.png'
|
||||
|
||||
|
||||
title = u'Auto Prove'
|
||||
publisher = 'CONTE Editore'
|
||||
category = 'Sport'
|
||||
@ -33,28 +33,26 @@ class AutoPR(BasicNewsRecipe):
|
||||
remove_javascript = True
|
||||
no_stylesheets = True
|
||||
|
||||
#html2lrf_options = [
|
||||
# html2lrf_options = [
|
||||
# '--comment', description
|
||||
# , '--category', category
|
||||
# , '--publisher', publisher
|
||||
# , '--ignore-tables'
|
||||
# ]
|
||||
|
||||
#html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True'
|
||||
|
||||
keep_only_tags = [
|
||||
dict(name='h2', attrs={'class':['tit_Article y_Txt']}),
|
||||
dict(name='h2', attrs={'class':['tit_Article']}),
|
||||
dict(name='div', attrs={'class':['box_Img newsdet_new ']}),
|
||||
dict(name='div', attrs={'class':['box_Img newsdet_as ']}),
|
||||
dict(name='table', attrs={'class':['table_A']}),
|
||||
dict(name='div', attrs={'class':['txt_Article txtBox_cms']}),
|
||||
dict(name='h2', attrs={'class': ['tit_Article y_Txt']}),
|
||||
dict(name='h2', attrs={'class': ['tit_Article']}),
|
||||
dict(name='div', attrs={'class': ['box_Img newsdet_new ']}),
|
||||
dict(name='div', attrs={'class': ['box_Img newsdet_as ']}),
|
||||
dict(name='table', attrs={'class': ['table_A']}),
|
||||
dict(name='div', attrs={'class': ['txt_Article txtBox_cms']}),
|
||||
dict(name='testoscheda')]
|
||||
|
||||
def parse_index(self):
|
||||
feeds = []
|
||||
for title, url in [
|
||||
("Prove su Strada" , "http://www.auto.it/rss/prove+6.xml")
|
||||
("Prove su Strada", "http://www.auto.it/rss/prove+6.xml")
|
||||
]:
|
||||
soup = self.index_to_soup(url)
|
||||
soup = soup.find('channel')
|
||||
@ -74,17 +72,19 @@ class AutoPR(BasicNewsRecipe):
|
||||
def create_links_append(self, link, date, description):
|
||||
current_articles = []
|
||||
|
||||
current_articles.append({'title': 'Generale', 'url': link,'description':description, 'date':date}),
|
||||
current_articles.append({'title': 'Design', 'url': link.replace('scheda','design'),'description':'scheda', 'date':''}),
|
||||
current_articles.append({'title': 'Interni', 'url': link.replace('scheda','interni'),'description':'Interni', 'date':''}),
|
||||
current_articles.append({'title': 'Tecnica', 'url': link.replace('scheda','tecnica'),'description':'Tecnica', 'date':''}),
|
||||
current_articles.append({'title': 'Su Strada', 'url': link.replace('scheda','su_strada'),'description':'Su Strada', 'date':''}),
|
||||
current_articles.append({'title': 'Pagella', 'url': link.replace('scheda','pagella'),'description':'Pagella', 'date':''}),
|
||||
current_articles.append({'title': 'Rilevamenti', 'url': link.replace('scheda','telemetria'),'description':'Rilevamenti', 'date':''})
|
||||
current_articles.append(
|
||||
{'title': 'Generale', 'url': link, 'description': description, 'date': date}),
|
||||
current_articles.append({'title': 'Design', 'url': link.replace(
|
||||
'scheda', 'design'), 'description': 'scheda', 'date': ''}),
|
||||
current_articles.append({'title': 'Interni', 'url': link.replace(
|
||||
'scheda', 'interni'), 'description': 'Interni', 'date': ''}),
|
||||
current_articles.append({'title': 'Tecnica', 'url': link.replace(
|
||||
'scheda', 'tecnica'), 'description': 'Tecnica', 'date': ''}),
|
||||
current_articles.append({'title': 'Su Strada', 'url': link.replace(
|
||||
'scheda', 'su_strada'), 'description': 'Su Strada', 'date': ''}),
|
||||
current_articles.append({'title': 'Pagella', 'url': link.replace(
|
||||
'scheda', 'pagella'), 'description': 'Pagella', 'date': ''}),
|
||||
current_articles.append({'title': 'Rilevamenti', 'url': link.replace(
|
||||
'scheda', 'telemetria'), 'description': 'Rilevamenti', 'date': ''})
|
||||
|
||||
return current_articles
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
@ -9,6 +9,7 @@ auto-bild.ro
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class AutoBild(BasicNewsRecipe):
|
||||
title = u'Auto Bild'
|
||||
__author__ = u'Silviu Cotoar\u0103'
|
||||
@ -24,27 +25,21 @@ class AutoBild(BasicNewsRecipe):
|
||||
cover_url = 'http://www.auto-bild.ro/images/autobild.gif'
|
||||
|
||||
conversion_options = {
|
||||
'comments' : description
|
||||
,'tags' : category
|
||||
,'language' : language
|
||||
,'publisher' : publisher
|
||||
'comments': description, 'tags': category, 'language': language, 'publisher': publisher
|
||||
}
|
||||
|
||||
|
||||
keep_only_tags = [
|
||||
dict(name='div', attrs={'class':'box_2 articol clearfix'})
|
||||
dict(name='div', attrs={'class': 'box_2 articol clearfix'})
|
||||
]
|
||||
|
||||
remove_tags = [
|
||||
dict(name='div', attrs={'class':['detail']})
|
||||
, dict(name='a', attrs={'id':['zoom_link']})
|
||||
, dict(name='div', attrs={'class':['icons clearfix']})
|
||||
, dict(name='div', attrs={'class':['pub_articol clearfix']})
|
||||
dict(name='div', attrs={'class': ['detail']}), dict(name='a', attrs={'id': ['zoom_link']}), dict(
|
||||
name='div', attrs={'class': ['icons clearfix']}), dict(name='div', attrs={'class': ['pub_articol clearfix']})
|
||||
|
||||
]
|
||||
|
||||
remove_tags_after = [
|
||||
dict(name='div', attrs={'class':['pub_articol clearfix']})
|
||||
dict(name='div', attrs={'class': ['pub_articol clearfix']})
|
||||
]
|
||||
|
||||
feeds = [
|
||||
|
@ -1,6 +1,7 @@
|
||||
import re
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class autogids(BasicNewsRecipe):
|
||||
title = u'Automatiseringgids IT'
|
||||
oldest_article = 7
|
||||
@ -17,10 +18,10 @@ class autogids(BasicNewsRecipe):
|
||||
publication_type = 'newspaper'
|
||||
encoding = 'utf-8'
|
||||
cover_url = 'http://www.automatiseringgids.nl/binaries/content/gallery/ag/marketing/ag-avatar-100x50.jpg'
|
||||
keep_only_tags = [dict(name='div', attrs={'class':['content']})]
|
||||
keep_only_tags = [dict(name='div', attrs={'class': ['content']})]
|
||||
|
||||
preprocess_regexps = [
|
||||
(re.compile(r'(<h3>Reacties</h3>|<h2>Zie ook:</h2>|<div style=".*</div>|<a[^>]*>|</a>)', re.DOTALL|re.IGNORECASE),
|
||||
(re.compile(r'(<h3>Reacties</h3>|<h2>Zie ook:</h2>|<div style=".*</div>|<a[^>]*>|</a>)', re.DOTALL | re.IGNORECASE),
|
||||
lambda match: ''),
|
||||
]
|
||||
|
||||
|
@ -9,22 +9,25 @@ www.autosport.com
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class autosport(BasicNewsRecipe):
|
||||
title = u'Autosport'
|
||||
__author__ = 'MrStefan <mrstefaan@gmail.com>'
|
||||
language = 'en_GB'
|
||||
description =u'Daily Formula 1 and motorsport news from the leading weekly motor racing magazine. The authority on Formula 1, F1, MotoGP, GP2, Champ Car, Le Mans...'
|
||||
masthead_url='http://cdn.images.autosport.com/asdotcom.gif'
|
||||
remove_empty_feeds= True
|
||||
description = u'Daily Formula 1 and motorsport news from the leading weekly motor racing magazine. The authority on Formula 1, F1, MotoGP, GP2, Champ Car, Le Mans...' # noqa
|
||||
masthead_url = 'http://cdn.images.autosport.com/asdotcom.gif'
|
||||
remove_empty_feeds = True
|
||||
oldest_article = 1
|
||||
max_articles_per_feed = 100
|
||||
remove_javascript=True
|
||||
no_stylesheets=True
|
||||
remove_javascript = True
|
||||
no_stylesheets = True
|
||||
|
||||
keep_only_tags =[]
|
||||
keep_only_tags.append(dict(name = 'h1', attrs = {'class' : 'news_headline'}))
|
||||
keep_only_tags.append(dict(name = 'td', attrs = {'class' : 'news_article_author'}))
|
||||
keep_only_tags.append(dict(name = 'td', attrs = {'class' : 'news_article_date'}))
|
||||
keep_only_tags.append(dict(name = 'p'))
|
||||
keep_only_tags = []
|
||||
keep_only_tags.append(dict(name='h1', attrs={'class': 'news_headline'}))
|
||||
keep_only_tags.append(
|
||||
dict(name='td', attrs={'class': 'news_article_author'}))
|
||||
keep_only_tags.append(
|
||||
dict(name='td', attrs={'class': 'news_article_date'}))
|
||||
keep_only_tags.append(dict(name='p'))
|
||||
|
||||
feeds = [(u'ALL NEWS', u'http://www.autosport.com/rss/allnews.xml')]
|
||||
|
@ -9,6 +9,7 @@ avantaje.ro
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class Avantaje(BasicNewsRecipe):
|
||||
title = u'Avantaje'
|
||||
__author__ = u'Silviu Cotoar\u0103'
|
||||
@ -24,29 +25,20 @@ class Avantaje(BasicNewsRecipe):
|
||||
cover_url = 'http://www.avantaje.ro/images/default/logo.gif'
|
||||
|
||||
conversion_options = {
|
||||
'comments' : description
|
||||
,'tags' : category
|
||||
,'language' : language
|
||||
,'publisher' : publisher
|
||||
'comments': description, 'tags': category, 'language': language, 'publisher': publisher
|
||||
}
|
||||
|
||||
keep_only_tags = [
|
||||
dict(name='div', attrs={'id':'articol'})
|
||||
, dict(name='div', attrs={'class':'gallery clearfix'})
|
||||
, dict(name='div', attrs={'align':'justify'})
|
||||
dict(name='div', attrs={'id': 'articol'}), dict(name='div', attrs={
|
||||
'class': 'gallery clearfix'}), dict(name='div', attrs={'align': 'justify'})
|
||||
]
|
||||
|
||||
remove_tags = [
|
||||
dict(name='div', attrs={'id':['color_sanatate_box']})
|
||||
, dict(name='div', attrs={'class':['nav']})
|
||||
, dict(name='div', attrs={'class':['voteaza_art']})
|
||||
, dict(name='div', attrs={'class':['bookmark']})
|
||||
, dict(name='div', attrs={'class':['links clearfix']})
|
||||
, dict(name='div', attrs={'class':['title']})
|
||||
dict(name='div', attrs={'id': ['color_sanatate_box']}), dict(name='div', attrs={'class': ['nav']}), dict(name='div', attrs={'class': ['voteaza_art']}), dict(name='div', attrs={'class': ['bookmark']}), dict(name='div', attrs={'class': ['links clearfix']}), dict(name='div', attrs={'class': ['title']}) # noqa
|
||||
]
|
||||
|
||||
remove_tags_after = [
|
||||
dict(name='div', attrs={'class':['title']})
|
||||
dict(name='div', attrs={'class': ['title']})
|
||||
]
|
||||
|
||||
feeds = [
|
||||
|
@ -9,6 +9,7 @@ aventurilapescuit.ro
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class AventuriLaPescuit(BasicNewsRecipe):
|
||||
title = u'Aventuri La Pescuit'
|
||||
__author__ = u'Silviu Cotoar\u0103'
|
||||
@ -24,23 +25,20 @@ class AventuriLaPescuit(BasicNewsRecipe):
|
||||
cover_url = 'http://www.aventurilapescuit.ro/images/logo.gif'
|
||||
|
||||
conversion_options = {
|
||||
'comments' : description
|
||||
,'tags' : category
|
||||
,'language' : language
|
||||
,'publisher' : publisher
|
||||
'comments': description, 'tags': category, 'language': language, 'publisher': publisher
|
||||
}
|
||||
|
||||
keep_only_tags = [
|
||||
dict(name='div', attrs={'id':'Article'})
|
||||
dict(name='div', attrs={'id': 'Article'})
|
||||
]
|
||||
|
||||
remove_tags = [
|
||||
dict(name='div', attrs={'class':['right option']})
|
||||
, dict(name='iframe', attrs={'scrolling':['no']})
|
||||
dict(name='div', attrs={'class': ['right option']}), dict(
|
||||
name='iframe', attrs={'scrolling': ['no']})
|
||||
]
|
||||
|
||||
remove_tags_after = [
|
||||
dict(name='iframe', attrs={'scrolling':['no']})
|
||||
dict(name='iframe', attrs={'scrolling': ['no']})
|
||||
]
|
||||
|
||||
feeds = [
|
||||
|
@ -4,6 +4,8 @@ __copyright__ = '2010, BlonG'
|
||||
avto-magazin.si
|
||||
'''
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class Dnevnik(BasicNewsRecipe):
|
||||
title = u'Avto Magazin'
|
||||
__author__ = u'BlonG'
|
||||
@ -15,8 +17,7 @@ class Dnevnik(BasicNewsRecipe):
|
||||
use_embedded_content = False
|
||||
language = 'sl'
|
||||
|
||||
conversion_options = {'linearize_tables' : True}
|
||||
|
||||
conversion_options = {'linearize_tables': True}
|
||||
|
||||
cover_url = 'https://sites.google.com/site/javno2010/home/avto_magazin_cover.jpg'
|
||||
|
||||
@ -28,20 +29,19 @@ class Dnevnik(BasicNewsRecipe):
|
||||
'''
|
||||
|
||||
keep_only_tags = [
|
||||
dict(name='div', attrs={'id':'_iprom_inStream'}),
|
||||
# dict(name='div', attrs={'class':'entry-content'}),
|
||||
dict(name='div', attrs={'id': '_iprom_inStream'}),
|
||||
# dict(name='div', attrs={'class':'entry-content'}),
|
||||
]
|
||||
|
||||
remove_tags = [
|
||||
dict(name='div', attrs={'id':'voteConfirmation'}),
|
||||
dict(name='div', attrs={'id':'InsideVote'}),
|
||||
dict(name='div', attrs={'class':'Zone234'}),
|
||||
dict(name='div', attrs={'class':'Comments'}),
|
||||
dict(name='div', attrs={'class':'sorodneNovice'}),
|
||||
dict(name='div', attrs={'id':'footer'}),
|
||||
dict(name='div', attrs={'id': 'voteConfirmation'}),
|
||||
dict(name='div', attrs={'id': 'InsideVote'}),
|
||||
dict(name='div', attrs={'class': 'Zone234'}),
|
||||
dict(name='div', attrs={'class': 'Comments'}),
|
||||
dict(name='div', attrs={'class': 'sorodneNovice'}),
|
||||
dict(name='div', attrs={'id': 'footer'}),
|
||||
]
|
||||
|
||||
|
||||
feeds = [
|
||||
(u'Novice', u'http://www.avto-magazin.si/rss/')
|
||||
]
|
||||
|
@ -6,6 +6,7 @@ axxon.com.ar
|
||||
from calibre import strftime
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class Axxon_news(BasicNewsRecipe):
|
||||
title = 'Revista Axxon'
|
||||
__author__ = 'Darko Miletic'
|
||||
@ -21,20 +22,17 @@ class Axxon_news(BasicNewsRecipe):
|
||||
encoding = 'utf-8'
|
||||
publication_type = 'magazine'
|
||||
INDEX = 'http://axxon.com.ar/rev/'
|
||||
extra_css = ' body{font-family: Verdana,Arial,sans-serif} .editorial{font-family: serif} .posttitle{font-family: "Trebuchet MS","Lucida Grande",Verdana,Arial,sans-serif} .cuento{font-family: "Times New Roman", serif} .biografia{color: red; font-weight: bold; font-family: Verdana,Geneva,Arial,Helvetica,sans-serif} '
|
||||
extra_css = ' body{font-family: Verdana,Arial,sans-serif} .editorial{font-family: serif} .posttitle{font-family: "Trebuchet MS","Lucida Grande",Verdana,Arial,sans-serif} .cuento{font-family: "Times New Roman", serif} .biografia{color: red; font-weight: bold; font-family: Verdana,Geneva,Arial,Helvetica,sans-serif} ' # noqa
|
||||
|
||||
conversion_options = {
|
||||
'comment' : description
|
||||
, 'tags' : category
|
||||
, 'publisher' : publisher
|
||||
, 'language' : language
|
||||
'comment': description, 'tags': category, 'publisher': publisher, 'language': language
|
||||
}
|
||||
|
||||
|
||||
keep_only_tags = [dict(name='div', attrs={'class':'post'})]
|
||||
remove_tags = [dict(name=['object','link','iframe','embed','img'])]
|
||||
remove_tags_after = [dict(attrs={'class':['editorial','correo','biografia','articulo']})]
|
||||
remove_attributes = ['width','height','font','border','align']
|
||||
keep_only_tags = [dict(name='div', attrs={'class': 'post'})]
|
||||
remove_tags = [dict(name=['object', 'link', 'iframe', 'embed', 'img'])]
|
||||
remove_tags_after = [
|
||||
dict(attrs={'class': ['editorial', 'correo', 'biografia', 'articulo']})]
|
||||
remove_attributes = ['width', 'height', 'font', 'border', 'align']
|
||||
|
||||
def parse_index(self):
|
||||
articles = []
|
||||
@ -44,21 +42,16 @@ class Axxon_news(BasicNewsRecipe):
|
||||
description = ''
|
||||
title_prefix = ''
|
||||
feed_link = item.find('a')
|
||||
if feed_link and feed_link.has_key('href') and feed_link['href'].startswith('?p='):
|
||||
if feed_link and feed_link.has_key('href') and feed_link['href'].startswith('?p='): # noqa
|
||||
url = self.INDEX + feed_link['href']
|
||||
title = title_prefix + self.tag_to_string(feed_link)
|
||||
date = strftime(self.timefmt)
|
||||
articles.append({
|
||||
'title' :title
|
||||
,'date' :date
|
||||
,'url' :url
|
||||
,'description':description
|
||||
'title': title, 'date': date, 'url': url, 'description': description
|
||||
})
|
||||
return [(soup.head.title.string, articles)]
|
||||
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
return self.adeify_images(soup)
|
||||
|
||||
|
@ -8,6 +8,7 @@ axxon.com.ar
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
from calibre.ebooks.BeautifulSoup import Tag
|
||||
|
||||
|
||||
class Axxon_news(BasicNewsRecipe):
|
||||
title = 'Axxon noticias'
|
||||
__author__ = 'Darko Miletic'
|
||||
@ -23,40 +24,34 @@ class Axxon_news(BasicNewsRecipe):
|
||||
lang = 'es-AR'
|
||||
|
||||
conversion_options = {
|
||||
'comment' : description
|
||||
, 'tags' : category
|
||||
, 'publisher' : publisher
|
||||
, 'language' : lang
|
||||
, 'pretty_print' : True
|
||||
'comment': description, 'tags': category, 'publisher': publisher, 'language': lang, 'pretty_print': True
|
||||
}
|
||||
|
||||
keep_only_tags = [dict(name='div', attrs={'class': 'post'})]
|
||||
|
||||
keep_only_tags = [dict(name='div', attrs={'class':'post'})]
|
||||
|
||||
remove_tags = [dict(name=['object','link','iframe','embed'])]
|
||||
remove_tags = [dict(name=['object', 'link', 'iframe', 'embed'])]
|
||||
|
||||
feeds = [(u'Noticias', u'http://axxon.com.ar/noticias/feed/')]
|
||||
|
||||
remove_attributes = ['style','width','height','font','border','align']
|
||||
|
||||
remove_attributes = ['style', 'width', 'height', 'font', 'border', 'align']
|
||||
|
||||
def adeify_images2(cls, soup):
|
||||
for item in soup.findAll('img'):
|
||||
for attrib in ['height','width','border','align','style']:
|
||||
if item.has_key(attrib):
|
||||
for attrib in ['height', 'width', 'border', 'align', 'style']:
|
||||
if item.has_key(attrib): # noqa
|
||||
del item[attrib]
|
||||
oldParent = item.parent
|
||||
if oldParent.name == 'a':
|
||||
oldParent.name == 'p'
|
||||
myIndex = oldParent.contents.index(item)
|
||||
brtag = Tag(soup,'br')
|
||||
oldParent.insert(myIndex+1,brtag)
|
||||
brtag = Tag(soup, 'br')
|
||||
oldParent.insert(myIndex + 1, brtag)
|
||||
return soup
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
soup.html['xml:lang'] = self.lang
|
||||
soup.html['lang'] = self.lang
|
||||
mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
|
||||
soup.html.insert(0,mlang)
|
||||
mlang = Tag(soup, 'meta', [
|
||||
("http-equiv", "Content-Language"), ("content", self.lang)])
|
||||
soup.html.insert(0, mlang)
|
||||
return self.adeify_images2(soup)
|
||||
|
||||
|
@ -7,6 +7,7 @@ azstarnet.com
|
||||
import urllib
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class Azstarnet(BasicNewsRecipe):
|
||||
title = 'Arizona Daily Star'
|
||||
__author__ = 'Darko Miletic'
|
||||
@ -23,36 +24,29 @@ class Azstarnet(BasicNewsRecipe):
|
||||
needs_subscription = True
|
||||
|
||||
conversion_options = {
|
||||
'comment' : description
|
||||
, 'tags' : category
|
||||
, 'publisher' : publisher
|
||||
, 'language' : language
|
||||
'comment': description, 'tags': category, 'publisher': publisher, 'language': language
|
||||
}
|
||||
|
||||
|
||||
def get_browser(self):
|
||||
br = BasicNewsRecipe.get_browser(self)
|
||||
br.open('http://azstarnet.com/')
|
||||
if self.username is not None and self.password is not None:
|
||||
data = urllib.urlencode({ 'm':'login'
|
||||
,'u':self.username
|
||||
,'p':self.password
|
||||
,'z':'http://azstarnet.com/'
|
||||
data = urllib.urlencode({'m': 'login', 'u': self.username, 'p': self.password, 'z': 'http://azstarnet.com/'
|
||||
})
|
||||
br.open('http://azstarnet.com/app/registration/proxy.php',data)
|
||||
br.open('http://azstarnet.com/app/registration/proxy.php', data)
|
||||
return br
|
||||
|
||||
remove_tags = [dict(name=['object','link','iframe','base','img'])]
|
||||
|
||||
remove_tags = [dict(name=['object', 'link', 'iframe', 'base', 'img'])]
|
||||
|
||||
feeds = [
|
||||
(u'Local News' , u'http://azstarnet.com/search/?f=rss&t=article&c=news/local&l=25&s=start_time&sd=desc')
|
||||
,(u'National News' , u'http://azstarnet.com/search/?f=rss&t=article&c=news/national&l=25&s=start_time&sd=desc')
|
||||
,(u'World News' , u'http://azstarnet.com/search/?f=rss&t=article&c=news/world&l=25&s=start_time&sd=desc')
|
||||
,(u'Sports' , u'http://azstarnet.com/search/?f=rss&t=article&c=sports&l=25&s=start_time&sd=desc')
|
||||
,(u'Opinion' , u'http://azstarnet.com/search/?f=rss&t=article&c=news/opinion&l=25&s=start_time&sd=desc')
|
||||
,(u'Movies' , u'http://azstarnet.com/search/?f=rss&t=article&c=entertainment/movies&l=25&s=start_time&sd=desc')
|
||||
,(u'Food' , u'http://azstarnet.com/search/?f=rss&t=article&c=lifestyles/food-and-cooking&l=25&s=start_time&sd=desc')
|
||||
|
||||
(u'Local News', u'http://azstarnet.com/search/?f=rss&t=article&c=news/local&l=25&s=start_time&sd=desc'),
|
||||
(u'National News', u'http://azstarnet.com/search/?f=rss&t=article&c=news/national&l=25&s=start_time&sd=desc'),
|
||||
(u'World News', u'http://azstarnet.com/search/?f=rss&t=article&c=news/world&l=25&s=start_time&sd=desc'),
|
||||
(u'Sports', u'http://azstarnet.com/search/?f=rss&t=article&c=sports&l=25&s=start_time&sd=desc'),
|
||||
(u'Opinion', u'http://azstarnet.com/search/?f=rss&t=article&c=news/opinion&l=25&s=start_time&sd=desc'),
|
||||
(u'Movies', u'http://azstarnet.com/search/?f=rss&t=article&c=entertainment/movies&l=25&s=start_time&sd=desc'),
|
||||
(u'Food', u'http://azstarnet.com/search/?f=rss&t=article&c=lifestyles/food-and-cooking&l=25&s=start_time&sd=desc')
|
||||
]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
@ -62,4 +56,3 @@ class Azstarnet(BasicNewsRecipe):
|
||||
|
||||
def print_version(self, url):
|
||||
return url + '?print=1'
|
||||
|
||||
|
@ -8,6 +8,7 @@ b365.realitatea.net
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class b365Realitatea(BasicNewsRecipe):
|
||||
title = u'b365 Realitatea'
|
||||
__author__ = u'Silviu Cotoar\u0103'
|
||||
@ -23,25 +24,20 @@ class b365Realitatea(BasicNewsRecipe):
|
||||
cover_url = 'http://b365.realitatea.net/wp-content/themes/b/images/b365-logo.png'
|
||||
|
||||
conversion_options = {
|
||||
'comments' : description
|
||||
,'tags' : category
|
||||
,'language' : language
|
||||
,'publisher' : publisher
|
||||
'comments': description, 'tags': category, 'language': language, 'publisher': publisher
|
||||
}
|
||||
|
||||
keep_only_tags = [
|
||||
dict(name='div', attrs={'class':'newsArticle'})
|
||||
dict(name='div', attrs={'class': 'newsArticle'})
|
||||
]
|
||||
|
||||
remove_tags = [
|
||||
dict(name='div', attrs={'class':'date'})
|
||||
, dict(name='dic', attrs={'class':'addthis_toolbox addthis_default_style'})
|
||||
, dict(name='div', attrs={'class':'related_posts'})
|
||||
, dict(name='div', attrs={'id':'RelevantiWidget'})
|
||||
dict(name='div', attrs={'class': 'date'}), dict(name='dic', attrs={'class': 'addthis_toolbox addthis_default_style'}), dict(
|
||||
name='div', attrs={'class': 'related_posts'}), dict(name='div', attrs={'id': 'RelevantiWidget'})
|
||||
]
|
||||
|
||||
remove_tags_after = [
|
||||
dict(name='div', attrs={'id':'RelevantiWidget'})
|
||||
dict(name='div', attrs={'id': 'RelevantiWidget'})
|
||||
]
|
||||
feeds = [
|
||||
(u'\u0218tiri', u'http://b365.realitatea.net/rss-full/')
|
||||
@ -49,4 +45,3 @@ class b365Realitatea(BasicNewsRecipe):
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
return self.adeify_images(soup)
|
||||
|
||||
|
@ -7,6 +7,7 @@ b92.net
|
||||
import re
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class B92(BasicNewsRecipe):
|
||||
title = 'B92'
|
||||
__author__ = 'Darko Miletic'
|
||||
@ -30,33 +31,32 @@ class B92(BasicNewsRecipe):
|
||||
"""
|
||||
|
||||
conversion_options = {
|
||||
'comment' : description
|
||||
, 'tags' : category
|
||||
, 'publisher': publisher
|
||||
, 'language' : language
|
||||
, 'linearize_tables' : True
|
||||
'comment': description, 'tags': category, 'publisher': publisher, 'language': language, 'linearize_tables': True
|
||||
}
|
||||
|
||||
preprocess_regexps = [
|
||||
(re.compile(u'\u0110'), lambda match: u'\u00D0'),
|
||||
(re.compile(r'<html.*?<body>', re.DOTALL|re.IGNORECASE), lambda match: '<html><head><title>something</title></head><body>')
|
||||
(re.compile(r'<html.*?<body>', re.DOTALL | re.IGNORECASE),
|
||||
lambda match: '<html><head><title>something</title></head><body>')
|
||||
]
|
||||
|
||||
keep_only_tags = [dict(attrs={'class':['article-info1','article-text']})]
|
||||
remove_attributes = ['width','height','align','hspace','vspace','border','lang','xmlns:fb']
|
||||
keep_only_tags = [dict(attrs={'class': ['article-info1', 'article-text']})]
|
||||
remove_attributes = ['width', 'height', 'align',
|
||||
'hspace', 'vspace', 'border', 'lang', 'xmlns:fb']
|
||||
remove_tags = [
|
||||
dict(name=['embed','link','base','meta','iframe'])
|
||||
,dict(attrs={'id':'social'})
|
||||
dict(name=['embed', 'link', 'base', 'meta', 'iframe']), dict(
|
||||
attrs={'id': 'social'})
|
||||
]
|
||||
|
||||
feeds = [
|
||||
(u'Vesti' , u'http://www.b92.net/info/rss/vesti.xml' )
|
||||
,(u'Biz' , u'http://www.b92.net/info/rss/biz.xml' )
|
||||
,(u'Sport' , u'http://www.b92.net/info/rss/sport.xml' )
|
||||
,(u'Zivot' , u'http://www.b92.net/info/rss/zivot.xml' )
|
||||
,(u'Kultura' , u'http://www.b92.net/info/rss/kultura.xml' )
|
||||
,(u'Automobili' , u'http://www.b92.net/info/rss/automobili.xml')
|
||||
,(u'Tehnopolis' , u'http://www.b92.net/info/rss/tehnopolis.xml')
|
||||
|
||||
(u'Vesti', u'http://www.b92.net/info/rss/vesti.xml'),
|
||||
(u'Biz', u'http://www.b92.net/info/rss/biz.xml'),
|
||||
(u'Sport', u'http://www.b92.net/info/rss/sport.xml'),
|
||||
(u'Zivot', u'http://www.b92.net/info/rss/zivot.xml'),
|
||||
(u'Kultura', u'http://www.b92.net/info/rss/kultura.xml'),
|
||||
(u'Automobili', u'http://www.b92.net/info/rss/automobili.xml'),
|
||||
(u'Tehnopolis', u'http://www.b92.net/info/rss/tehnopolis.xml')
|
||||
]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
x
Reference in New Issue
Block a user