diff --git a/resources/recipes/revista_muy.recipe b/resources/recipes/revista_muy.recipe
index ae3d47466c..e452a6f053 100644
--- a/resources/recipes/revista_muy.recipe
+++ b/resources/recipes/revista_muy.recipe
@@ -1,3 +1,4 @@
+from calibre.web.feeds.news import re
from calibre.web.feeds.recipes import BasicNewsRecipe
from BeautifulSoup import Tag
@@ -10,26 +11,31 @@ class RevistaMuyInteresante(BasicNewsRecipe):
language = 'es'
no_stylesheets = True
- remove_attributes = ['style', 'font']
+ remove_javascript = True
+
+ extra_css = ' .txt_articulo{ font-family: sans-serif; font-size: medium; text-align: justify } .contentheading{font-family: serif; font-size: large; font-weight: bold; color: #000000; text-align: center}'
- #then we add our own style(s) like this:
- extra_css = '''
- .contentheading{font-weight: bold}
- p {font-size: 4px;font-family: Times New Roman;}
- '''
def preprocess_html(self, soup):
+ for item in soup.findAll(style=True):
+ del item['style']
+
for img_tag in soup.findAll('img'):
- parent_tag = img_tag.parent
- if parent_tag.name == 'td':
- if not parent_tag.get('class') == 'txt_articulo': break
- imagen = img_tag
- new_tag = Tag(soup,'p')
- img_tag.replaceWith(new_tag)
- div = soup.find(attrs={'class':'article_category'})
- div.insert(0,imagen)
+ imagen = img_tag
+ new_tag = Tag(soup,'p')
+ img_tag.replaceWith(new_tag)
+ div = soup.find(attrs={'class':'article_category'})
+ div.insert(0,imagen)
+ break
return soup
+
+ preprocess_regexps = [
+ (re.compile(r'
.*? | ', re.DOTALL|re.IGNORECASE), lambda match: '' + match.group().replace(' | ','').strip().replace(' | ','').strip() + ''),
+
+ ]
+
+
keep_only_tags = [dict(name='div', attrs={'class':['article']}),dict(name='td', attrs={'class':['txt_articulo']})]
remove_tags = [
@@ -37,6 +43,7 @@ class RevistaMuyInteresante(BasicNewsRecipe):
,dict(name='div', attrs={'id':['comment']})
,dict(name='td', attrs={'class':['buttonheading']})
,dict(name='div', attrs={'class':['tags_articles']})
+ ,dict(name='table', attrs={'class':['pagenav']})
]
remove_tags_after = dict(name='div', attrs={'class':'tags_articles'})
@@ -71,8 +78,33 @@ class RevistaMuyInteresante(BasicNewsRecipe):
for title, url in [
('Historia',
'http://www.muyinteresante.es/historia-articulos'),
+ ('Ciencia',
+ 'http://www.muyinteresante.es/ciencia-articulos'),
+ ('Naturaleza',
+ 'http://www.muyinteresante.es/naturaleza-articulos'),
+ ('Tecnología',
+ 'http://www.muyinteresante.es/tecnologia-articulos'),
+ ('Salud',
+ 'http://www.muyinteresante.es/salud-articulos'),
+ ('Más Muy',
+ 'http://www.muyinteresante.es/muy'),
+ ('Innova - Automoción',
+ 'http://www.muyinteresante.es/articulos-innovacion-autos'),
+ ('Innova - Salud',
+ 'http://www.muyinteresante.es/articulos-innovacion-salud'),
+ ('Innova - Medio Ambiente',
+ 'http://www.muyinteresante.es/articulos-innovacion-medio-ambiente'),
+ ('Innova - Alimentación',
+ 'http://www.muyinteresante.es/articulos-innovacion-alimentacion'),
+ ('Innova - Sociedad',
+ 'http://www.muyinteresante.es/articulos-innovacion-sociedad'),
+ ('Innova - Tecnología',
+ 'http://www.muyinteresante.es/articulos-innovacion-tecnologia'),
+ ('Innova - Ocio',
+ 'http://www.muyinteresante.es/articulos-innovacion-ocio'),
]:
articles = self.nz_parse_section(url)
if articles:
feeds.append((title, articles))
return feeds
+