BabyOnline.ro by Silviu Cotoara and update ecuisine.ro, egirl.ro and tabu.ro

This commit is contained in:
Kovid Goyal 2011-04-21 09:39:41 -06:00
parent e910505e4e
commit deb05bae33
5 changed files with 68 additions and 7 deletions

59
recipes/babyonline.recipe Normal file
View File

@ -0,0 +1,59 @@
# -*- coding: utf-8 -*-
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = u'2011, Silviu Cotoar\u0103'
'''
babyonline.ro
'''
from calibre.web.feeds.news import BasicNewsRecipe
class BabyOnline(BasicNewsRecipe):
title = u'Baby Online'
__author__ = u'Silviu Cotoar\u0103'
description = u'De la p\u0103rinte la p\u0103rinte'
publisher = u'Baby Online'
oldest_article = 50
language = 'ro'
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
category = 'Ziare,Reviste,Copii,Mame'
encoding = 'utf-8'
cover_url = 'http://www.babyonline.ro/images/default/logo.gif'
conversion_options = {
'comments' : description
,'tags' : category
,'language' : language
,'publisher' : publisher
}
keep_only_tags = [
dict(name='div', attrs={'id':'article_container'})
]
remove_tags = [
dict(name='div', attrs={'id':'bar_nav'}),
dict(name='div', attrs={'id':'service_send'}),
dict(name='div', attrs={'id':'other_videos'}),
dict(name='div', attrs={'class':'dot_line_yellow'}),
dict(name='a', attrs={'class':'print'}),
dict(name='a', attrs={'class':'email'}),
dict(name='a', attrs={'class':'YM'}),
dict(name='a', attrs={'class':'comment'}),
dict(name='div', attrs={'class':'tombstone_cross'}),
dict(name='span', attrs={'class':'liketext'})
]
remove_tags_after = [
dict(name='div', attrs={'id':'service_send'})
]
feeds = [
(u'Feeds', u'http://www.babyonline.ro/rss_homepage.xml')
]
def preprocess_html(self, soup):
return self.adeify_images(soup)

View File

@ -14,14 +14,14 @@ class EcuisineRo(BasicNewsRecipe):
__author__ = u'Silviu Cotoar\u0103'
description = u'Reinventeaz\u0103 pl\u0103cerea de a g\u0103ti'
publisher = 'eCuisine'
oldest_article = 5
oldest_article = 50
language = 'ro'
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
category = 'Ziare,Retete,Bucatarie'
encoding = 'utf-8'
cover_url = ''
cover_url = 'http://www.ecuisine.ro/sites/all/themes/ecuisine/images/logo.gif'
conversion_options = {
'comments' : description
@ -31,8 +31,8 @@ class EcuisineRo(BasicNewsRecipe):
}
keep_only_tags = [
dict(name='div', attrs={'class':'page-title'})
, dict(name='div', attrs={'class':'content clearfix'})
dict(name='h1', attrs={'id':'page-title'})
, dict(name='div', attrs={'class':'field-item even'})
]
remove_tags = [

View File

@ -31,8 +31,8 @@ class EgirlRo(BasicNewsRecipe):
}
keep_only_tags = [
dict(name='div', attrs={'id':'title_art'})
, dict(name='div', attrs={'class':'content_style'})
dict(name='div', attrs={'id':'content_art'})
, dict(name='div', attrs={'class':'content_articol'})
]
feeds = [

Binary file not shown.

After

Width:  |  Height:  |  Size: 256 B

View File

@ -37,10 +37,12 @@ class TabuRo(BasicNewsRecipe):
]
remove_tags = [
dict(name='div', attrs={'class':'asemanatoare'})
dict(name='div', attrs={'class':'asemanatoare'}),
dict(name='div', attrs={'class':'social'})
]
remove_tags_after = [
dict(name='div', attrs={'class':'social'}),
dict(name='div', attrs={'id':'comments'}),
dict(name='div', attrs={'class':'asemanatoare'})
]