Various Romanian news sources by Silviu Cotoara

This commit is contained in:
Kovid Goyal 2011-02-15 09:14:28 -07:00
parent f30fa82cf9
commit 040be5fe03
12 changed files with 585 additions and 0 deletions

View File

@ -0,0 +1,50 @@
# -*- coding: utf-8 -*-
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = u'2011, Silviu Cotoar\u0103'
'''
adevarul.ro
'''
from calibre.web.feeds.news import BasicNewsRecipe
class Adevarul(BasicNewsRecipe):
title = u'Adev\u0103rul'
language = 'ro'
__author__ = u'Silviu Cotoar\u0103'
description = u'\u0218tiri din Rom\u00e2nia'
publisher = 'Adevarul'
category = 'Ziare,Stiri,Romania'
oldest_article = 5
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
encoding = 'utf-8'
remove_javascript = True
cover_url = 'http://upload.wikimedia.org/wikipedia/en/d/d6/Logo_noul_adevarul.png'
conversion_options = {
'comments' : description
,'tags' : category
,'language' : language
,'publisher' : publisher
}
keep_only_tags = [ dict(name='div', attrs={'class':'article_header'})
,dict(name='div', attrs={'class':'bd'})
]
remove_tags = [ dict(name='div', attrs={'class':'bb-wg-article_related_attachements'})
,dict(name='div', attrs={'class':'bb-md bb-md-article_comments'})
,dict(name='form', attrs={'id':'bb-comment-create-form'})
]
remove_tags_after = [ dict(name='form', attrs={'id':'bb-comment-create-form'}) ]
feeds = [ (u'\u0218tiri', u'http://www.adevarul.ro/rss/latest') ]
def preprocess_html(self, soup):
return self.adeify_images(soup)

View File

@ -0,0 +1,44 @@
# -*- coding: utf-8 -*-
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = u'2011, Silviu Cotoar\u0103'
'''
capital.ro
'''
from calibre.web.feeds.news import BasicNewsRecipe
class Capital(BasicNewsRecipe):
title = 'Capital'
__author__ = u'Silviu Cotoar\u0103'
description = u'\u0218tiri din Rom\u00e2nia'
oldest_article = 5
language = 'ro'
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
category = 'Ziare,Stiri,Romania'
encoding = 'utf-8'
remove_javascript = True
publisher = 'Capital'
cover_url = 'http://www.mediapress.ro/imagini/sigla-capital-s16.gif'
conversion_options = {
'comments' : description
,'tags' : category
,'language' : language
,'publisher' : publisher
}
keep_only_tags = [ dict(name='div', attrs={'class':'single one_article'})
]
remove_tags = [ dict(name='div', attrs={'class':'single_details'})
, dict(name='div', attrs={'class':'tx-addoceansbanners-pi1'})
]
feeds = [(u'\u0218tiri', u'http://www.capital.ro/rss.html') ]
def preprocess_html(self, soup):
return self.adeify_images(soup)

View File

@ -0,0 +1,53 @@
# -*- coding: utf-8 -*-
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = u'2011, Silviu Cotoar\u0103'
'''
catavencu.ro
'''
from calibre.web.feeds.news import BasicNewsRecipe
class Catavencu(BasicNewsRecipe):
title = u'Academia Ca\u0163avencu'
__author__ = u'Silviu Cotoar\u0103'
description = 'Tagma cum laude'
publisher = 'Catavencu'
oldest_article = 5
language = 'ro'
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
category = 'Ziare'
encoding = 'utf-8'
cover_url = 'http://upload.wikimedia.org/wikipedia/en/1/1e/Academia_Catavencu.jpg'
conversion_options = {
'comments' : description
,'tags' : category
,'language' : language
,'publisher' : publisher
}
keep_only_tags = [
dict(name='ul', attrs={'class':'articles'})
]
remove_tags = [
dict(name='div', attrs={'class':['tools']})
, dict(name='div', attrs={'class':['share']})
, dict(name='div', attrs={'class':['category']})
, dict(name='div', attrs={'id':['comments']})
]
remove_tags_after = [
dict(name='div', attrs={'id':'comments'})
]
feeds = [
(u'Feeds', u'http://catavencu.ro/feed/rss')
]
def preprocess_html(self, soup):
return self.adeify_images(soup)

View File

@ -0,0 +1,47 @@
# -*- coding: utf-8 -*-
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = u'2011, Silviu Cotoar\u0103'
'''
gandul.info
'''
from calibre.web.feeds.news import BasicNewsRecipe
class Gandul(BasicNewsRecipe):
title = u'G\u00E2ndul'
__author__ = u'Silviu Cotoar\u0103'
publisher = 'Gandul'
description = 'Cotidian Online'
oldest_article = 5
language = 'ro'
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
category = 'Ziare,Stiri,Romania'
encoding = 'utf-8'
cover_url = 'http://storage0.dms.mpinteractiv.ro/media/1/1/1706/1064063/1/logo.jpg?width=400'
conversion_options = {
'comments' : description
,'tags' : category
,'language' : language
,'publisher' : publisher
}
keep_only_tags = [
dict(name='div', attrs={'class':'article'})
]
remove_tags = [
dict(name='a', attrs={'class':'photo'})
, dict(name='div', attrs={'class':'ad'})
]
feeds = [
(u'\u0218tiri', u'http://www.gandul.info/rss-stiri-prima-pagina.xml')
]
def preprocess_html(self, soup):
return self.adeify_images(soup)

View File

@ -0,0 +1,46 @@
# -*- coding: utf-8 -*-
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = u'2011, Silviu Cotoar\u0103'
'''
hotnews.ro
'''
from calibre.web.feeds.news import BasicNewsRecipe
class Hotnews(BasicNewsRecipe):
title = 'Hotnews'
__author__ = u'Silviu Cotoar\u0103'
description = u'\u0218tiri din Rom\u00e2nia'
publisher = 'Hotnews'
oldest_article = 5
language = 'ro'
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
category = 'Ziare,Stiri,Romania'
encoding = 'utf-8'
cover_url = 'http://www.hotnews.ro/images/new/logo.gif'
conversion_options = {
'comments' : description
,'tags' : category
,'language' : language
,'publisher' : publisher
}
keep_only_tags = [
dict(name='h1', attrs={'class':'title'})
,dict(name='div', attrs={'id':'articleContent'})
]
feeds = [ (u'\u0218tiri', u'http://www.hotnews.ro/rss/actualitate')
,(u'English', u'http://www.hotnews.ro/rss/english')
]
def preprocess_html(self, soup):
return self.adeify_images(soup)

View File

@ -0,0 +1,54 @@
# -*- coding: utf-8 -*-
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = u'2011, Silviu Cotoar\u0103'
'''
jurnalul.ro
'''
from calibre.web.feeds.news import BasicNewsRecipe
class JurnalulNational(BasicNewsRecipe):
title = u'Jurnalul Na\u0163ional'
__author__ = u'Silviu Cotoar\u0103'
description = u'\u0218tiri din Rom\u00e2nia'
publisher = 'Jurnalul National'
oldest_article = 5
language = 'ro'
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
category = 'Ziare,Stiri,Romania'
encoding = 'utf-8'
cover_url = 'http://www.jurnalul.ro/images/sigla.png'
conversion_options = {
'comments' : description
,'tags' : category
,'language' : language
,'publisher' : publisher
}
keep_only_tags = [
dict(name='h1', attrs={'class':'h3 art_title'})
,dict(name='div', attrs={'class':'only_text'})
]
feeds = [
(u'\u0218tiri', u'http://www.jurnalul.ro/rss/stiri-3028.html')
,(u'Special', u'http://www.jurnalul.ro/rss/special-3001.html')
,(u'Sport', u'http://www.jurnalul.ro/rss/sport-3035.html')
,(u'Bani Afaceri', u'http://www.jurnalul.ro/rss/bani-afaceri-3006.html')
,(u'Viata Sanatoasa', u'http://www.jurnalul.ro/rss/viata-sanatoasa-3010.html')
,(u'Stiinta Tehnica', u'http://www.jurnalul.ro/rss/stiinta-tehnica-3019.html')
,(u'Timp Liber', u'http://www.jurnalul.ro/rss/timp-liber-3022.html')
,(u'Fun', u'http://www.jurnalul.ro/rss/fun-3038.html')
,(u'Acum 20 de ani', u'http://www.jurnalul.ro/rss/acum-20-de-ani-3073.html')
]
def preprocess_html(self, soup):
return self.adeify_images(soup)

View File

@ -0,0 +1,52 @@
# -*- coding: utf-8 -*-
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = u'2011, Silviu Cotoar\u0103'
'''
mediafax.ro
'''
from calibre.web.feeds.news import BasicNewsRecipe
class Mediafax(BasicNewsRecipe):
title = 'Mediafax'
__author__ = u'Silviu Cotoar\u0103'
description = u'\u0218tiri din Rom\u00e2nia'
publisher = 'Mediafax'
oldest_article = 5
language = 'ro'
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
category = 'Ziare,Stiri,Romania'
encoding = 'utf-8'
cover_url = 'http://storage0.dms.mpinteractiv.ro/media/1/1/1706/4134575/2/logo-mediafax-mass-media-news.jpg?width=400'
conversion_options = {
'comments' : description
,'tags' : category
,'language' : language
,'publisher' : publisher
}
keep_only_tags = [
dict(name='div', attrs={'class':'news tabs-container'})
]
remove_tags = [
dict(name='ul', attrs={'class':['CategoryNews']})
,dict(name='div', attrs={'class':['read']})
]
remove_tags_after = [ dict(name='div', attrs={'class':'cmsItemViews'}) ]
feeds = [
(u'Actualitate', u'http://www.mediafax.ro/rss/')
]
def preprocess_html(self, soup):
return self.adeify_images(soup)

View File

@ -0,0 +1,54 @@
# -*- coding: utf-8 -*-
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = u'2011, Silviu Cotoar\u0103'
'''
money.ro
'''
from calibre.web.feeds.news import BasicNewsRecipe
class MoneyRo(BasicNewsRecipe):
title = 'Money Ro'
__author__ = u'Silviu Cotoar\u0103'
description = u'\u0218tiri din Rom\u00e2nia'
publisher = 'MoneyRo'
oldest_article = 5
language = 'ro'
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
category = 'Ziare,Stiri,Romania'
encoding = 'utf-8'
remove_javascript = True
cover_url = 'http://assets.moneyweb.ro/images/logo_money.jpg'
conversion_options = {
'comments' : description
,'tags' : category
,'language' : language
,'publisher' : publisher
}
keep_only_tags = [ dict(name='div', attrs={'id':'titluArticol'})
, dict(name='img', attrs={'id':'objImage'})
, dict(name='div', attrs={'class':'leftColumnArticle'})
]
remove_tags_after = [ dict(name='div', attrs={'id':'articleTags'}) ]
remove_tags = [ dict(name='div', attrs={'id':'ads'})
, dict(name='div', attrs={'id':'aus'})
, dict(name='div', attrs={'id':'bb-comment-create-form'})
, dict(name='div', attrs={'id':'articleTags'})
, dict(name='div', attrs={'class':'breadcrumb'})
]
feeds = [(u'\u0218tiri', u'http://moneyro.feedsportal.com/c/32533/fe.ed/rss.money.ro/stiri.xml') ]
def preprocess_html(self, soup):
return self.adeify_images(soup)

View File

@ -0,0 +1,49 @@
# -*- coding: utf-8 -*-
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = u'2011, Silviu Cotoar\u0103'
'''
prosport.ro
'''
from calibre.web.feeds.news import BasicNewsRecipe
class Prosport(BasicNewsRecipe):
title = 'Prosport'
__author__ = u'Silviu Cotoar\u0103'
publisher = 'Prosport'
description = u'\u0218tiri Sportive din Rom\u00e2nia'
oldest_article = 5
language = 'ro'
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
category = 'Ziare,Stiri,Romania,Sport'
encoding = 'utf-8'
cover_url = 'http://storage0.dms.mpinteractiv.ro/media/401/581/7946/3688311/1/logo-pro.jpg?width=610'
conversion_options = {
'comments' : description
,'tags' : category
,'language' : language
,'publisher' : publisher
}
keep_only_tags = [
dict(name='h1', attrs={'class':'a-title'})
,dict(name='div', attrs={'class':'a-entry'})
]
remove_tags = [ dict(name='div', attrs={'class':'utils'})
,dict(name='div', attrs={'class':'g-slide'})
]
feeds = [ (u'\u0218tiri', u'http://www.prosport.ro/rss.xml')]
def preprocess_html(self, soup):
return self.adeify_images(soup)

View File

@ -0,0 +1,45 @@
# -*- coding: utf-8 -*-
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = u'2011, Silviu Cotoar\u0103'
'''
realitatea.net
'''
from calibre.web.feeds.news import BasicNewsRecipe
class Realitatea(BasicNewsRecipe):
title = 'Realitatea'
__author__ = u'Silviu Cotoar\u0103'
publisher = 'Realitatea'
description = u'\u0218tiri din Rom\u00e2nia'
oldest_article = 5
language = 'ro'
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
category = 'Ziare,Stiri,Romania'
encoding = 'utf-8'
cover_url = 'http://assets.realitatea.ro/images/logo.jpg'
conversion_options = {
'comments' : description
,'tags' : category
,'language' : language
,'publisher' : publisher
}
keep_only_tags = [
dict(name='div', attrs={'class':'articleTitle '})
,dict(name='div', attrs={'class':'articleBody'})
]
remove_tags = [ dict(name='div', attrs={'id':'aus'}) ]
feeds = [ (u'\u0218tiri', u'http://realitatea.feedsportal.com/c/32533/fe.ed/rss.realitatea.net/stiri.xml') ]
def preprocess_html(self, soup):
return self.adeify_images(soup)

View File

@ -0,0 +1,46 @@
# -*- coding: utf-8 -*-
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = u'2011, Silviu Cotoar\u0103'
'''
standard.money.ro
'''
from calibre.web.feeds.news import BasicNewsRecipe
class StandardMoneyRo(BasicNewsRecipe):
title = 'Standard Money Ro'
__author__ = u'Silviu Cotoar\u0103'
publisher = 'Standard Money'
description = 'Portal de Business'
oldest_article = 5
language = 'ro'
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
category = 'Ziare,Stiri,Romania'
encoding = 'utf-8'
cover_url = 'http://assets.standard.ro/wp-content/themes/standard/images/standard-logo.gif'
conversion_options = {
'comments' : description
,'tags' : category
,'language' : language
,'publisher' : publisher
}
keep_only_tags = [
dict(name='h1', attrs={'class':'post-title'})
, dict(name='div', attrs={'class':'content_post'})
]
feeds = [
(u'Actualitate', u'http://standard.money.ro/feed')
]
def preprocess_html(self, soup):
return self.adeify_images(soup)

View File

@ -0,0 +1,45 @@
# -*- coding: utf-8 -*-
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = u'2011, Silviu Cotoar\u0103'
'''
zf.ro
'''
from calibre.web.feeds.news import BasicNewsRecipe
class ZiarulFinanciar(BasicNewsRecipe):
title = 'Ziarul Financiar'
__author__ = u'Silviu Cotoar\u0103'
description = u'\u0218tiri din Business'
publisher = 'Ziarul Financiar'
oldest_article = 5
language = 'ro'
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
category = 'Ziare,Stiri,Romania'
encoding = 'utf-8'
cover_url = 'http://storage0.dms.mpinteractiv.ro/media/1/1/1706/7462721/1/ziarul-financiar-big.jpg?width=400'
conversion_options = {
'comments' : description
,'tags' : category
,'language' : language
,'publisher' : publisher
}
keep_only_tags = [
dict(name='div', attrs={'class':'article'})
]
feeds = [
(u'\u0218tiri', u'http://www.zf.ro/rss/zf-24/')
]
def preprocess_html(self, soup):
return self.adeify_images(soup)