'
'''
clarin.com
'''
@@ -8,9 +8,9 @@ clarin.com
from calibre.web.feeds.news import BasicNewsRecipe
class Clarin(BasicNewsRecipe):
- title = 'Clarin'
+ title = 'Clarín'
__author__ = 'Darko Miletic'
- description = 'Noticias de Argentina y mundo'
+ description = 'Clarin.com. Noticias de la Argentina y el mundo. Información actualizada las 24 horas y en español. Informate ya'
publisher = 'Grupo Clarin'
category = 'news, politics, Argentina'
oldest_article = 2
@@ -23,12 +23,10 @@ class Clarin(BasicNewsRecipe):
publication_type = 'newspaper'
INDEX = 'http://www.clarin.com'
masthead_url = 'http://www.clarin.com/static/CLAClarin/images/logo-clarin-print.jpg'
- extra_css = """
- body{font-family: Arial,Helvetica,sans-serif}
- h2{font-family: Georgia,serif; font-size: xx-large}
- .hora{font-weight:bold}
- .hd p{font-size: small}
- .nombre-autor{color: #0F325A}
+ extra_css = """
+ body{font-family: Arial,Helvetica,sans-serif}
+ h2{font-family: Georgia,serif; font-size: xx-large}
+ .info,.nombre-autor,.hora{font-size: small}
"""
conversion_options = {
@@ -38,38 +36,35 @@ class Clarin(BasicNewsRecipe):
, 'language' : language
}
- keep_only_tags = [dict(attrs={'class':['hd','mt']})]
- remove_tags = [dict(name=['meta','base','link'])]
- remove_attributes = ['lang','_mce_bogus']
+ keep_only_tags = [dict(attrs={'class':['hd','mt','bd']})]
+ remove_tags = [dict(name=['meta','base','link','iframe','embed','object'])]
+ remove_attributes = ['lang']
feeds = [
(u'Pagina principal', u'http://www.clarin.com/rss/' )
,(u'Politica' , u'http://www.clarin.com/rss/politica/' )
,(u'Deportes' , u'http://www.clarin.com/rss/deportes/' )
- ,(u'Economia' , u'http://www.clarin.com/economia/' )
,(u'Mundo' , u'http://www.clarin.com/rss/mundo/' )
+ ,(u'iEco' , u'http://www.ieco.clarin.com/rss/' )
,(u'Espectaculos' , u'http://www.clarin.com/rss/espectaculos/')
,(u'Sociedad' , u'http://www.clarin.com/rss/sociedad/' )
,(u'Ciudades' , u'http://www.clarin.com/rss/ciudades/' )
,(u'Policiales' , u'http://www.clarin.com/rss/policiales/' )
,(u'Internet' , u'http://www.clarin.com/rss/internet/' )
- ,(u'Ciudades' , u'http://www.clarin.com/rss/ciudades/' )
]
-
- def get_article_url(self, article):
- return article.get('guid', None)
-
def print_version(self, url):
return url + '?print=1'
+ def get_article_url(self, article):
+ return article.get('guid', None)
+
def get_cover_url(self):
cover_url = None
soup = self.index_to_soup(self.INDEX)
- cover_item = soup.find('div',attrs={'class':'bb-md bb-md-edicion_papel'})
- if cover_item:
- ap = cover_item.find('a',attrs={'href':'/edicion-impresa/'})
- if ap:
- cover_url = self.INDEX + ap.img['src']
+ for item in soup.findAll('a', href=True):
+ if item['href'].startswith('/tapas/TAPA_CLA'):
+ cover_url = self.INDEX + item['href']
+ return cover_url
return cover_url
diff --git a/recipes/daily_mirror.recipe b/recipes/daily_mirror.recipe
index e050a123a9..b53a22b648 100644
--- a/recipes/daily_mirror.recipe
+++ b/recipes/daily_mirror.recipe
@@ -7,7 +7,7 @@ class AdvancedUserRecipe1306061239(BasicNewsRecipe):
description = 'News as provided by The Daily Mirror -UK'
__author__ = 'Dave Asbury'
- # last updated 28/4/12
+ # last updated 8/6/12
language = 'en_GB'
#cover_url = 'http://yookeo.com/screens/m/i/mirror.co.uk.jpg'
@@ -28,7 +28,7 @@ class AdvancedUserRecipe1306061239(BasicNewsRecipe):
dict(name='div',attrs={'class' : 'lead-text'}),
dict(name='div',attrs={'class' : 'styleGroup clearfix'}),
dict(name='div',attrs={'class' : 'widget relatedContents pictures widget-editable viziwyg-section-245 inpage-widget-158123'}),
- dict(name='figure',attrs={'class' : 'clearfix'}),
+ # dict(name='figure',attrs={'class' : 'clearfix'}),
dict(name='div',attrs={'class' :'body '}),
#dict(attrs={'class' : ['article-attr','byline append-1','published']}),
@@ -37,6 +37,7 @@ class AdvancedUserRecipe1306061239(BasicNewsRecipe):
remove_tags = [
+ dict(attrs={'class' : ['article sa-teaser type-opinion','image-gallery','gallery-caption']}),
dict(attrs={'class' : 'comment'}),
dict(name='title'),
dict(name='ul',attrs={'class' : 'clearfix breadcrumbs '}),
@@ -89,6 +90,3 @@ class AdvancedUserRecipe1306061239(BasicNewsRecipe):
#cover_url = cov2
#cover_url = 'http://www.thesun.co.uk/img/global/new-masthead-logo.png'
return cover_url
-
-
-
diff --git a/recipes/drytooling_pl.recipe b/recipes/drytooling_pl.recipe
new file mode 100644
index 0000000000..bb05e1a25f
--- /dev/null
+++ b/recipes/drytooling_pl.recipe
@@ -0,0 +1,15 @@
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class BasicUserRecipe1337668045(BasicNewsRecipe):
+ title = u'Drytooling.com.pl'
+ masthead_url = 'http://drytooling.com.pl/images/drytooling-kindle.png'
+ cover_url = 'http://drytooling.com.pl/images/drytooling-kindle.png'
+ description = u'Drytooling.com.pl jest serwisem wspinaczki zimowej, alpinizmu i himalaizmu. Jeśli uwielbiasz zimę, nie możesz doczekać się aż wyciągniesz szpej z szafki i uderzysz w Tatry, Alpy, czy może Himalaje, to znajdziesz tutaj naprawdę dużo interesujących Cię treści! Zapraszamy!'
+ __author__ = u'Damian Granowski'
+ oldest_article = 100
+ max_articles_per_feed = 20
+ auto_cleanup = True
+
+ feeds = [(u'Newsy', u'http://drytooling.com.pl/index.php?option=com_ninjarsssyndicator&feed_id=4&format=raw'), (u'Artyku\u0142y', u'http://drytooling.com.pl/index.php?option=com_ninjarsssyndicator&feed_id=3&format=raw'), (u'Imprezy i zawody', u'http://drytooling.com.pl/index.php?option=com_ninjarsssyndicator&feed_id=5&format=raw'), (u'Baza G\xf3rska', u'http://drytooling.com.pl/index.php?option=com_ninjarsssyndicator&feed_id=6&format=raw'), (u'Wyprawy', u'http://drytooling.com.pl/index.php?option=com_ninjarsssyndicator&feed_id=7&format=raw'), (u'Newsy / alpinizm', u'http://drytooling.com.pl/index.php?option=com_ninjarsssyndicator&feed_id=12&format=raw'), (u'Newsy / klasyka zimowa', u'http://drytooling.com.pl/index.php?option=com_ninjarsssyndicator&feed_id=11&format=raw'), (u'Newsy / himalaizm', u'http://drytooling.com.pl/index.php?option=com_ninjarsssyndicator&feed_id=10&format=raw'), (u'Outdoor', u'http://drytooling.com.pl/index.php?option=com_ninjarsssyndicator&feed_id=8&format=raw')]
diff --git a/recipes/economico.recipe b/recipes/economico.recipe
new file mode 100644
index 0000000000..86a1e15975
--- /dev/null
+++ b/recipes/economico.recipe
@@ -0,0 +1,30 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class Economico(BasicNewsRecipe):
+ title = u'Economico'
+ language = 'pt'
+ __author__ = 'Krittika Goyal'
+ oldest_article = 1 #days
+ max_articles_per_feed = 25
+ encoding = 'utf-8'
+ use_embedded_content = False
+
+ no_stylesheets = True
+ auto_cleanup = True
+
+
+ feeds = [
+('Ultima Hora',
+ 'http://economico.sapo.pt/rss/ultimas'),
+ ('Em Foco',
+ 'http://economico.sapo.pt/rss/emfoco'),
+ ('Mercados',
+ 'http://economico.sapo.pt/rss/mercados'),
+ ('Empresas',
+ 'http://economico.sapo.pt/rss/empresas'),
+ ('Economia',
+ 'http://economico.sapo.pt/rss/economia'),
+ ('Politica',
+ 'http://economico.sapo.pt/rss/politica'),
+]
+
diff --git a/recipes/el_mundo_today.recipe b/recipes/el_mundo_today.recipe
index 77a9f331a0..7f558d10e7 100644
--- a/recipes/el_mundo_today.recipe
+++ b/recipes/el_mundo_today.recipe
@@ -1,3 +1,4 @@
+import re
from calibre.web.feeds.news import BasicNewsRecipe
class ElMundoTodayRecipe(BasicNewsRecipe):
@@ -7,11 +8,32 @@ class ElMundoTodayRecipe(BasicNewsRecipe):
category = 'Noticias, humor'
cover_url = 'http://www.elmundotoday.com/wp-content/themes/EarthlyTouch/images/logo.png'
oldest_article = 30
- max_articles_per_feed = 30
- auto_cleanup = True
+ max_articles_per_feed = 60
+ auto_cleanup = False
no_stylesheets = True
+ remove_javascript = True
language = 'es'
- use_embedded_content = True
+ use_embedded_content = False
+
+ preprocess_regexps = [
+ (re.compile(r'.*', re.DOTALL),
+ lambda match: ''),
+ #(re.compile(r'^\t{5}$'), lambda match: ''),
+ #(re.compile(r'\t{5}$'), lambda match: ''),
+ (re.compile(r'', re.DOTALL),
+ lambda match: ''),
+ ]
+
+ keep_only_tags = [
+ dict(name='div', attrs={'class':'post-wrapper'})
+ ]
+
+ remove_attributes = [ 'href', 'title', 'alt' ]
+
+ extra_css = '''
+ .antetitulo{font-variant:small-caps; font-weight:bold} .articleinfo{font-size:small}
+ img{margin-bottom:0.4em; display:block; margin-left:auto; margin-right:auto}
+ '''
feeds = [('El Mundo Today', 'http://www.elmundotoday.com/feed/')]
diff --git a/recipes/elektroda_pl.recipe b/recipes/elektroda_pl.recipe
index 55858020ad..34871ea04a 100644
--- a/recipes/elektroda_pl.recipe
+++ b/recipes/elektroda_pl.recipe
@@ -10,6 +10,7 @@ class Elektroda(BasicNewsRecipe):
category = 'electronics'
language = 'pl'
max_articles_per_feed = 100
+ no_stylesheets= True
remove_tags_before=dict(name='span', attrs={'class':'postbody'})
remove_tags_after=dict(name='td', attrs={'class':'spaceRow'})
remove_tags=[dict(name='a', attrs={'href':'#top'})]
diff --git a/recipes/elpais_impreso.recipe b/recipes/elpais_impreso.recipe
index b22a41dcec..ffa1033477 100644
--- a/recipes/elpais_impreso.recipe
+++ b/recipes/elpais_impreso.recipe
@@ -1,5 +1,6 @@
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
__license__ = 'GPL v3'
-__copyright__ = '2010, Darko Miletic '
+__copyright__ = '2010-2012, Darko Miletic '
'''
www.elpais.com
'''
@@ -7,23 +8,24 @@ www.elpais.com
from calibre.web.feeds.news import BasicNewsRecipe
class ElPais_RSS(BasicNewsRecipe):
- title = 'El Pais'
+ title = u'El País'
__author__ = 'Darko Miletic'
- description = 'el periodico global en Castellano'
+ description = u'Noticias de última hora sobre la actualidad en España y el mundo: política, economía, deportes, cultura, sociedad, tecnología, gente, opinión, viajes, moda, televisión, los blogs y las firmas de EL PAÍS. Además especiales, vídeos, fotos, audios, gráficos, entrevistas, promociones y todos los servicios de EL PAÍS.'
publisher = 'EDICIONES EL PAIS, S.L.'
category = 'news, politics, finances, world, spain'
oldest_article = 2
max_articles_per_feed = 200
no_stylesheets = True
- encoding = 'cp1252'
+ encoding = 'utf8'
use_embedded_content = False
language = 'es'
remove_empty_feeds = True
publication_type = 'newspaper'
- masthead_url = 'http://www.elpais.com/im/tit_logo.gif'
+ masthead_url = 'http://ep01.epimg.net/iconos/v1.x/v1.0/logos/cabecera_portada.png'
extra_css = """
- body{font-family: Georgia,"Times New Roman",Times,serif }
- h3{font-family: Arial,Helvetica,sans-serif}
+ h1{font-family: Georgia,"Times New Roman",Times,serif }
+ #subtitulo_noticia, .firma, .figcaption{font-size: small}
+ body{font-family: Arial,Helvetica,Garuda,sans-serif}
img{margin-bottom: 0.4em; display:block}
"""
@@ -34,49 +36,61 @@ class ElPais_RSS(BasicNewsRecipe):
, 'language' : language
}
- keep_only_tags = [dict(attrs={'class':['cabecera_noticia estirar','cabecera_noticia','','contenido_noticia']})]
- remove_tags = [
- dict(name=['meta','link','base','iframe','embed','object'])
- ,dict(attrs={'class':['info_complementa','estructura_2col_der','votos estirar','votos']})
- ,dict(attrs={'id':'utilidades'})
+ keep_only_tags = [
+ dict(attrs={'id':['titulo_noticia','subtitulo_noticia']})
+ ,dict(attrs={'class':['firma','columna_texto','entrevista_p_r']})
+ ]
+ remove_tags = [
+ dict(name=['meta','link','base','iframe','embed','object'])
+ ,dict(attrs={'class':'disposicion_vertical'})
]
- remove_tags_after = dict(attrs={'id':'utilidades'})
- remove_attributes = ['lang','border','width','height']
feeds = [
- (u'Lo ultimo' , u'http://www.elpais.com/rss/feed.html?feedId=17046')
- ,(u'America Latina' , u'http://www.elpais.com/rss/feed.html?feedId=17041')
- ,(u'Mexico' , u'http://www.elpais.com/rss/feed.html?feedId=17042')
- ,(u'Europa' , u'http://www.elpais.com/rss/feed.html?feedId=17043')
- ,(u'Estados Unidos' , u'http://www.elpais.com/rss/feed.html?feedId=17044')
- ,(u'Oriente proximo' , u'http://www.elpais.com/rss/feed.html?feedId=17045')
- ,(u'Espana' , u'http://www.elpais.com/rss/feed.html?feedId=1002' )
- ,(u'Andalucia' , u'http://www.elpais.com/rss/feed.html?feedId=17057')
- ,(u'Catalunia' , u'http://www.elpais.com/rss/feed.html?feedId=17059')
- ,(u'Comunidad Valenciana' , u'http://www.elpais.com/rss/feed.html?feedId=17061')
- ,(u'Madrid' , u'http://www.elpais.com/rss/feed.html?feedId=1016' )
- ,(u'Pais Vasco' , u'http://www.elpais.com/rss/feed.html?feedId=17062')
- ,(u'Galicia' , u'http://www.elpais.com/rss/feed.html?feedId=17063')
- ,(u'Opinion' , u'http://www.elpais.com/rss/feed.html?feedId=1003' )
- ,(u'Sociedad' , u'http://www.elpais.com/rss/feed.html?feedId=1004' )
- ,(u'Deportes' , u'http://www.elpais.com/rss/feed.html?feedId=1007' )
- ,(u'Cultura' , u'http://www.elpais.com/rss/feed.html?feedId=1008' )
- ,(u'Cine' , u'http://www.elpais.com/rss/feed.html?feedId=17052')
- ,(u'Literatura' , u'http://www.elpais.com/rss/feed.html?feedId=17053')
- ,(u'Musica' , u'http://www.elpais.com/rss/feed.html?feedId=17051')
- ,(u'Arte' , u'http://www.elpais.com/rss/feed.html?feedId=17060')
- ,(u'Tecnologia' , u'http://www.elpais.com/rss/feed.html?feedId=1005' )
- ,(u'Economia' , u'http://www.elpais.com/rss/feed.html?feedId=1006' )
- ,(u'Ciencia' , u'http://www.elpais.com/rss/feed.html?feedId=17068')
- ,(u'Salud' , u'http://www.elpais.com/rss/feed.html?feedId=17074')
- ,(u'Ocio' , u'http://www.elpais.com/rss/feed.html?feedId=17075')
- ,(u'Justicia y Leyes' , u'http://www.elpais.com/rss/feed.html?feedId=17069')
- ,(u'Guerras y conflictos' , u'http://www.elpais.com/rss/feed.html?feedId=17070')
- ,(u'Politica' , u'http://www.elpais.com/rss/feed.html?feedId=17073')
+ (u'Lo ultimo' , u'http://ep00.epimg.net/rss/tags/ultimas_noticias.xml')
+ ,(u'America Latina' , u'http://elpais.com/tag/rss/latinoamerica/a/' )
+ ,(u'Mexico' , u'http://elpais.com/tag/rss/mexico/a/' )
+ ,(u'Europa' , u'http://elpais.com/tag/rss/europa/a/' )
+ ,(u'Estados Unidos' , u'http://elpais.com/tag/rss/estados_unidos/a/' )
+ ,(u'Oriente proximo' , u'http://elpais.com/tag/rss/oriente_proximo/a/' )
+ ,(u'Andalucia' , u'http://ep00.epimg.net/rss/ccaa/andalucia.xml' )
+ ,(u'Catalunia' , u'http://ep00.epimg.net/rss/ccaa/catalunya.xml' )
+ ,(u'Comunidad Valenciana' , u'http://ep00.epimg.net/rss/ccaa/valencia.xml' )
+ ,(u'Madrid' , u'http://ep00.epimg.net/rss/ccaa/madrid.xml' )
+ ,(u'Pais Vasco' , u'http://ep00.epimg.net/rss/ccaa/paisvasco.xml' )
+ ,(u'Galicia' , u'http://ep00.epimg.net/rss/ccaa/galicia.xml' )
+ ,(u'Sociedad' , u'http://ep00.epimg.net/rss/sociedad/portada.xml' )
+ ,(u'Deportes' , u'http://ep00.epimg.net/rss/deportes/portada.xml' )
+ ,(u'Cultura' , u'http://ep00.epimg.net/rss/cultura/portada.xml' )
+ ,(u'Cine' , u'http://elpais.com/tag/rss/cine/a/' )
+ ,(u'Economía' , u'http://elpais.com/tag/rss/economia/a/' )
+ ,(u'Literatura' , u'http://elpais.com/tag/rss/libros/a/' )
+ ,(u'Musica' , u'http://elpais.com/tag/rss/musica/a/' )
+ ,(u'Arte' , u'http://elpais.com/tag/rss/arte/a/' )
+ ,(u'Medio Ambiente' , u'http://elpais.com/tag/rss/medio_ambiente/a/' )
+ ,(u'Tecnologia' , u'http://ep01.epimg.net/rss/tecnologia/portada.xml' )
+ ,(u'Ciencia' , u'http://ep00.epimg.net/rss/tags/c_ciencia.xml' )
+ ,(u'Salud' , u'http://elpais.com/tag/rss/salud/a/' )
+ ,(u'Ocio' , u'http://elpais.com/tag/rss/ocio/a/' )
+ ,(u'Justicia y Leyes' , u'http://elpais.com/tag/rss/justicia/a/' )
+ ,(u'Guerras y conflictos' , u'http://elpais.com/tag/rss/conflictos/a/' )
+ ,(u'Politica' , u'http://ep00.epimg.net/rss/politica/portada.xml' )
+ ,(u'Opinion' , u'http://ep01.epimg.net/rss/politica/opinion.xml' )
]
- def print_version(self, url):
- return url + '?print=1'
+ def get_article_url(self, article):
+ url = BasicNewsRecipe.get_article_url(self, article)
+ if url and (not('/album/' in url) and not('/futbol/partido/' in url)):
+ return url
+ self.log('Skipping non-article', url)
+ return None
+
+ def get_cover_url(self):
+ soup = self.index_to_soup('http://elpais.com/')
+ for image in soup.findAll('img'):
+ if image['src'].endswith('elpaisTodayMiddle.jpg'):
+ sstr = image['src']
+ return sstr.replace('elpaisTodayMiddle.jpg', 'elpaisToday.jpg')
+ return None
def preprocess_html(self, soup):
for item in soup.findAll(style=True):
diff --git a/recipes/gameplay_pl.recipe b/recipes/gameplay_pl.recipe
index 7b0ccb4f55..dc90d79ed1 100644
--- a/recipes/gameplay_pl.recipe
+++ b/recipes/gameplay_pl.recipe
@@ -12,8 +12,8 @@ class Gameplay_pl(BasicNewsRecipe):
max_articles_per_feed = 100
remove_javascript= True
no_stylesheets= True
- keep_only_tags=[dict(name='div', attrs={'class':['news_endpage_tit', 'news']})]
- remove_tags=[dict(name='div', attrs={'class':['galeria', 'noedit center im', 'news_list', 'news_list_autor', 'stop_bot', 'tagi']}), dict(attrs={'usemap':'#map'})]
+ keep_only_tags=[dict(name='div', attrs={'class':['news_endpage_tit', 'news', 'news_container']})]
+ remove_tags=[dict(name='div', attrs={'class':['galeria', 'noedit center im', 'news_list', 'news_list_autor', 'stop_bot', 'tagi', 'news_tagi']}), dict(attrs={'usemap':'#map'}), dict(name='a', attrs={'class':['pin-it-button', 'twitter-share-button']})]
feeds = [(u'Wiadomo\u015bci', u'http://gameplay.pl/rss/')]
def image_url_processor(self, baseurl, url):
diff --git a/recipes/good_house_keeping.recipe b/recipes/good_house_keeping.recipe
index 1dc26e88e7..4501c1122a 100644
--- a/recipes/good_house_keeping.recipe
+++ b/recipes/good_house_keeping.recipe
@@ -8,12 +8,17 @@ class AdvancedUserRecipe1305547242(BasicNewsRecipe):
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
+ #auto_cleanup = True
remove_javascript = True
def print_version(self,url):
- segments = url.split('/')
- printURL = '/'.join(segments[0:3]) + '/print-this/' + '/'.join(segments[4:])
- return printURL
+ if '/tips-for-making-desserts?' in url:
+ return None
+ segments = url.split('/')
+ segments[-1] = segments[-1].split('?')[0]
+ segments[-1] +='?page=all'
+ printURL = '/'.join(segments[0:3]) + '/print-this/' + segments[-1]
+ return printURL
def preprocess_html(self, soup):
for alink in soup.findAll('a'):
@@ -22,10 +27,19 @@ class AdvancedUserRecipe1305547242(BasicNewsRecipe):
alink.replaceWith(tstr)
return soup
- feeds = [ (u'Recipes & Entertaining', u'http://www.goodhousekeeping.com/food/food-rss/?src=rss'),
- (u'Home & House', u'http://www.goodhousekeeping.com/home/home-rss/?src=rss'),
- (u'Diet & Health', u'http://www.goodhousekeeping.com/health/health-rss/?src=rss'),
- (u'Beauty & Style', u'http://www.goodhousekeeping.com/beauty/beauty-rss/?src=rss'),
- (u'Family & Pets', u'http://www.goodhousekeeping.com/family/family-rss/?src=rss'),
- (u'Saving Money', u'http://www.goodhousekeeping.com/money/money-rss/?src=rss'),
- ]
+
+ #feeds = [
+#(u'Food and Recipes', u'http://www.goodhousekeeping.com/rss/recipes/'),
+#]
+
+
+ feeds = [
+(u'Food and Recipes', u'http://www.goodhousekeeping.com/rss/recipes/'),
+(u'Home and Organizing', u'http://www.goodhousekeeping.com/rss/home/'),
+(u'Diet and Health', u'http://www.goodhousekeeping.com/rss/health/'),
+(u'Beauty and Anti-Aging', u'http://www.goodhousekeeping.com/rss/beauty/'),
+(u'Family and Relationships', u'http://www.goodhousekeeping.com/rss/family/'),
+(u'Holidays', u'http://www.goodhousekeeping.com/rss/holidays/'),
+(u'In the Test Kitchen', 'http://www.goodhousekeeping.com/rss/test-kitchen-blog/'),
+]
+
diff --git a/recipes/gosc_niedzielny.recipe b/recipes/gosc_niedzielny.recipe
index 6a99411244..59c8fc2f26 100644
--- a/recipes/gosc_niedzielny.recipe
+++ b/recipes/gosc_niedzielny.recipe
@@ -6,21 +6,20 @@ __copyright__ = '2011, Piotr Kontek, piotr.kontek@gmail.com'
from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ptempfile import PersistentTemporaryFile
+from datetime import date
import re
class GN(BasicNewsRecipe):
EDITION = 0
__author__ = 'Piotr Kontek'
+ title = u'Gość niedzielny'
description = 'Weekly magazine'
encoding = 'utf-8'
no_stylesheets = True
language = 'pl'
remove_javascript = True
temp_files = []
- simultaneous_downloads = 1
- masthead_url = 'http://gosc.pl/files/11/03/12/949089_top.gif'
- title = u'Gość niedzielny'
articles_are_obfuscated = True
@@ -56,22 +55,28 @@ class GN(BasicNewsRecipe):
self.temp_files[-1].close()
return self.temp_files[-1].name
- def find_last_issue(self):
- soup = self.index_to_soup('http://gosc.pl/wyszukaj/wydania/3.Gosc-Niedzielny')
- #szukam zdjęcia i linka do porzedniego pełnego numeru
+ def find_last_issue(self, year):
+ soup = self.index_to_soup('http://gosc.pl/wyszukaj/wydania/3.Gosc-Niedzielny/rok/' + str(year))
+
+ #szukam zdjęcia i linka do poprzedniego pełnego numeru
first = True
for d in soup.findAll('div', attrs={'class':'l release_preview_l'}):
img = d.find('img')
if img != None:
a = img.parent
self.EDITION = a['href']
+ self.title = img['alt']
self.cover_url = 'http://www.gosc.pl' + img['src']
- if not first:
+ if year != date.today().year or not first:
break
first = False
def parse_index(self):
- self.find_last_issue()
+ year = date.today().year
+ self.find_last_issue(year)
+ ##jeśli to pierwszy numer w roku trzeba pobrać poprzedni rok
+ if self.EDITION == 0:
+ self.find_last_issue(year-1)
soup = self.index_to_soup('http://www.gosc.pl' + self.EDITION)
feeds = []
#wstepniak
diff --git a/recipes/gram_pl.recipe b/recipes/gram_pl.recipe
index 1f8147ba3d..36982788f1 100644
--- a/recipes/gram_pl.recipe
+++ b/recipes/gram_pl.recipe
@@ -12,13 +12,16 @@ class Gram_pl(BasicNewsRecipe):
no_stylesheets= True
extra_css = 'h2 {font-style: italic; font-size:20px;} .picbox div {float: left;}'
cover_url=u'http://www.gram.pl/www/01/img/grampl_zima.png'
- remove_tags= [dict(name='p', attrs={'class':['extraText', 'must-log-in']}), dict(attrs={'class':['el', 'headline', 'post-info']}), dict(name='div', attrs={'class':['twojaOcena', 'comment-body', 'comment-author vcard', 'comment-meta commentmetadata', 'tw_button']}), dict(id=['igit_rpwt_css', 'comments', 'reply-title', 'igit_title'])]
- keep_only_tags= [dict(name='div', attrs={'class':['main', 'arkh-postmetadataheader', 'arkh-postcontent', 'post', 'content', 'news_header', 'news_subheader', 'news_text']}), dict(attrs={'class':['contentheading', 'contentpaneopen']})]
+ remove_tags= [dict(name='p', attrs={'class':['extraText', 'must-log-in']}), dict(attrs={'class':['el', 'headline', 'post-info', 'entry-footer clearfix']}), dict(name='div', attrs={'class':['twojaOcena', 'comment-body', 'comment-author vcard', 'comment-meta commentmetadata', 'tw_button', 'entry-comment-counter', 'snap_nopreview sharing robots-nocontent']}), dict(id=['igit_rpwt_css', 'comments', 'reply-title', 'igit_title'])]
+ keep_only_tags= [dict(name='div', attrs={'class':['main', 'arkh-postmetadataheader', 'arkh-postcontent', 'post', 'content', 'news_header', 'news_subheader', 'news_text']}), dict(attrs={'class':['contentheading', 'contentpaneopen']}), dict(name='article')]
feeds = [(u'Informacje', u'http://www.gram.pl/feed_news.asp'),
- (u'Publikacje', u'http://www.gram.pl/feed_news.asp?type=articles')]
+ (u'Publikacje', u'http://www.gram.pl/feed_news.asp?type=articles'),
+ (u'Kolektyw- Indie Games', u'http://indie.gram.pl/feed/'),
+ #(u'Kolektyw- Moto Games', u'http://www.motogames.gram.pl/news.rss')
+ ]
- def parse_feeds (self):
- feeds = BasicNewsRecipe.parse_feeds(self)
+ def parse_feeds (self):
+ feeds = BasicNewsRecipe.parse_feeds(self)
for feed in feeds:
for article in feed.articles[:]:
if 'REKLAMA SKLEP' in article.title.upper() or u'ARTYKUŁ:' in article.title.upper():
@@ -56,4 +59,4 @@ class Gram_pl(BasicNewsRecipe):
for a in soup('a'):
if a.has_key('href') and 'http://' not in a['href'] and 'https://' not in a['href']:
a['href']=self.index + a['href']
- return soup
\ No newline at end of file
+ return soup
diff --git a/recipes/greenlinux_pl.recipe b/recipes/greenlinux_pl.recipe
deleted file mode 100644
index 3c5a3c8f20..0000000000
--- a/recipes/greenlinux_pl.recipe
+++ /dev/null
@@ -1,13 +0,0 @@
-from calibre.web.feeds.news import BasicNewsRecipe
-
-class GreenLinux(BasicNewsRecipe):
- title = u'GreenLinux.pl'
- __author__ = 'fenuks'
- category = 'IT'
- language = 'pl'
- cover_url = 'http://lh5.ggpht.com/_xd_6Y9kXhEc/S8tjyqlfhfI/AAAAAAAAAYU/zFNTp07ZQko/top.png'
- oldest_article = 15
- max_articles_per_feed = 100
- auto_cleanup = True
-
- feeds = [(u'Newsy', u'http://feeds.feedburner.com/greenlinux')]
diff --git a/recipes/grid_to.recipe b/recipes/grid_to.recipe
index bd3146082e..a066219b24 100644
--- a/recipes/grid_to.recipe
+++ b/recipes/grid_to.recipe
@@ -1,8 +1,8 @@
from calibre.web.feeds.news import BasicNewsRecipe
-class TheGridTO(BasicNewsRecipe):
+class TheGrid(BasicNewsRecipe):
#: The title to use for the ebook
- title = u'The Grid TO'
+ title = u'The Grid'
#: A couple of lines that describe the content this recipe downloads.
#: This will be used primarily in a GUI that presents a list of recipes.
@@ -67,9 +67,7 @@ class TheGridTO(BasicNewsRecipe):
div = soup.find(attrs={'class': section_class})
articles = []
- for tag in div.findAllNext(attrs={'class':'search-block'}):
- a = tag.findAll('a', href=True)[1]
-
+ for a in div.findAll(attrs={'class':'post-title'}):
title = self.tag_to_string(a)
url = a['href']
diff --git a/recipes/haaretz_en.recipe b/recipes/haaretz_en.recipe
index 4404624aff..ade32ae5ea 100644
--- a/recipes/haaretz_en.recipe
+++ b/recipes/haaretz_en.recipe
@@ -1,16 +1,15 @@
__license__ = 'GPL v3'
-__copyright__ = '2010, Darko Miletic '
+__copyright__ = '2010-2012, Darko Miletic '
'''
www.haaretz.com
'''
import re
-from calibre import strftime
-from time import gmtime
+import urllib
from calibre.web.feeds.news import BasicNewsRecipe
-class HaaretzPrint_en(BasicNewsRecipe):
- title = 'Haaretz - print edition'
+class Haaretz_en(BasicNewsRecipe):
+ title = 'Haaretz'
__author__ = 'Darko Miletic'
description = "Haaretz.com is the world's leading English-language Website for real-time news and analysis of Israel and the Middle East."
publisher = 'Haaretz'
@@ -21,10 +20,16 @@ class HaaretzPrint_en(BasicNewsRecipe):
encoding = 'utf8'
use_embedded_content = False
language = 'en_IL'
+ needs_subscription = True
+ remove_empty_feeds = True
publication_type = 'newspaper'
PREFIX = 'http://www.haaretz.com'
- masthead_url = PREFIX + '/images/logos/logoGrey.gif'
- extra_css = ' body{font-family: Verdana,Arial,Helvetica,sans-serif } '
+ masthead_url = PREFIX + '/images/logos/HaaretzLogo.gif'
+ extra_css = """
+ body{font-family: Verdana,Arial,Helvetica,sans-serif }
+ h1, .articleBody {font-family: Georgia, serif}
+ .authorBar {font-size: small}
+ """
preprocess_regexps = [(re.compile(r'.*?', re.DOTALL|re.IGNORECASE),lambda match: '