diff --git a/resources/images/news/7seri.png b/resources/images/news/7seri.png new file mode 100644 index 0000000000..dbb805cbb2 Binary files /dev/null and b/resources/images/news/7seri.png differ diff --git a/resources/images/news/adevarul.png b/resources/images/news/adevarul.png new file mode 100644 index 0000000000..03be084a13 Binary files /dev/null and b/resources/images/news/adevarul.png differ diff --git a/resources/images/news/aventurilapescuit.png b/resources/images/news/aventurilapescuit.png new file mode 100644 index 0000000000..f81dde2446 Binary files /dev/null and b/resources/images/news/aventurilapescuit.png differ diff --git a/resources/images/news/capital.png b/resources/images/news/capital.png new file mode 100644 index 0000000000..e5a4792cda Binary files /dev/null and b/resources/images/news/capital.png differ diff --git a/resources/images/news/catavencu.png b/resources/images/news/catavencu.png new file mode 100644 index 0000000000..4590bcc638 Binary files /dev/null and b/resources/images/news/catavencu.png differ diff --git a/resources/images/news/chipro.png b/resources/images/news/chipro.png new file mode 100644 index 0000000000..1a3091208f Binary files /dev/null and b/resources/images/news/chipro.png differ diff --git a/resources/images/news/csid.png b/resources/images/news/csid.png new file mode 100644 index 0000000000..124a0dd8d2 Binary files /dev/null and b/resources/images/news/csid.png differ diff --git a/resources/images/news/curierulnational.png b/resources/images/news/curierulnational.png new file mode 100644 index 0000000000..812754af75 Binary files /dev/null and b/resources/images/news/curierulnational.png differ diff --git a/resources/images/news/descopera.png b/resources/images/news/descopera.png new file mode 100644 index 0000000000..44a05688dc Binary files /dev/null and b/resources/images/news/descopera.png differ diff --git a/resources/images/news/ecuisine.png b/resources/images/news/ecuisine.png new file mode 100644 index 0000000000..0f4c5ad190 Binary files /dev/null and b/resources/images/news/ecuisine.png differ diff --git a/resources/images/news/egirl.png b/resources/images/news/egirl.png new file mode 100644 index 0000000000..d697e45e1b Binary files /dev/null and b/resources/images/news/egirl.png differ diff --git a/resources/images/news/fhmro.png b/resources/images/news/fhmro.png new file mode 100644 index 0000000000..4eaef06dfa Binary files /dev/null and b/resources/images/news/fhmro.png differ diff --git a/resources/images/news/gandul.png b/resources/images/news/gandul.png new file mode 100644 index 0000000000..37909c7ccd Binary files /dev/null and b/resources/images/news/gandul.png differ diff --git a/resources/images/news/go4it.png b/resources/images/news/go4it.png new file mode 100644 index 0000000000..6bf21ddc94 Binary files /dev/null and b/resources/images/news/go4it.png differ diff --git a/resources/images/news/gsp.png b/resources/images/news/gsp.png new file mode 100644 index 0000000000..de1ff91fc7 Binary files /dev/null and b/resources/images/news/gsp.png differ diff --git a/resources/images/news/hotcity.png b/resources/images/news/hotcity.png new file mode 100644 index 0000000000..8e3f97e0c2 Binary files /dev/null and b/resources/images/news/hotcity.png differ diff --git a/resources/images/news/hotnews.png b/resources/images/news/hotnews.png new file mode 100644 index 0000000000..ea00fa094c Binary files /dev/null and b/resources/images/news/hotnews.png differ diff --git a/resources/images/news/intrefete.png b/resources/images/news/intrefete.png new file mode 100644 index 0000000000..c0e17c335a Binary files /dev/null and b/resources/images/news/intrefete.png differ diff --git a/resources/images/news/jurnalulnational.png b/resources/images/news/jurnalulnational.png new file mode 100644 index 0000000000..c82d12ecad Binary files /dev/null and b/resources/images/news/jurnalulnational.png differ diff --git a/resources/images/news/kudika.png b/resources/images/news/kudika.png new file mode 100644 index 0000000000..22feb211ea Binary files /dev/null and b/resources/images/news/kudika.png differ diff --git a/resources/images/news/mediafax.png b/resources/images/news/mediafax.png new file mode 100644 index 0000000000..6514314e06 Binary files /dev/null and b/resources/images/news/mediafax.png differ diff --git a/resources/images/news/moneyro.png b/resources/images/news/moneyro.png new file mode 100644 index 0000000000..6112dddcb6 Binary files /dev/null and b/resources/images/news/moneyro.png differ diff --git a/resources/images/news/nationalgeoro.png b/resources/images/news/nationalgeoro.png new file mode 100644 index 0000000000..b268af14e2 Binary files /dev/null and b/resources/images/news/nationalgeoro.png differ diff --git a/resources/images/news/prosport.png b/resources/images/news/prosport.png new file mode 100644 index 0000000000..ca66f3cd51 Binary files /dev/null and b/resources/images/news/prosport.png differ diff --git a/resources/images/news/realitatea.png b/resources/images/news/realitatea.png new file mode 100644 index 0000000000..032c4d100e Binary files /dev/null and b/resources/images/news/realitatea.png differ diff --git a/resources/images/news/romanialibera.png b/resources/images/news/romanialibera.png new file mode 100644 index 0000000000..c680fd0fd3 Binary files /dev/null and b/resources/images/news/romanialibera.png differ diff --git a/resources/images/news/sfin.png b/resources/images/news/sfin.png new file mode 100644 index 0000000000..0aba2efc03 Binary files /dev/null and b/resources/images/news/sfin.png differ diff --git a/resources/images/news/standardmoney.png b/resources/images/news/standardmoney.png new file mode 100644 index 0000000000..101adcb6bc Binary files /dev/null and b/resources/images/news/standardmoney.png differ diff --git a/resources/images/news/superbebe.png b/resources/images/news/superbebe.png new file mode 100644 index 0000000000..db7e111a8c Binary files /dev/null and b/resources/images/news/superbebe.png differ diff --git a/resources/images/news/tabu.png b/resources/images/news/tabu.png new file mode 100644 index 0000000000..cbeeda53c5 Binary files /dev/null and b/resources/images/news/tabu.png differ diff --git a/resources/images/news/unica.png b/resources/images/news/unica.png new file mode 100644 index 0000000000..87e2d8d780 Binary files /dev/null and b/resources/images/news/unica.png differ diff --git a/resources/images/news/ziarulfinanciar.png b/resources/images/news/ziarulfinanciar.png new file mode 100644 index 0000000000..514d041a36 Binary files /dev/null and b/resources/images/news/ziarulfinanciar.png differ diff --git a/resources/recipes/7seri.recipe b/resources/recipes/7seri.recipe new file mode 100644 index 0000000000..af5757e97e --- /dev/null +++ b/resources/recipes/7seri.recipe @@ -0,0 +1,51 @@ +# -*- coding: utf-8 -*- +#!/usr/bin/env python + +__license__ = 'GPL v3' +__copyright__ = u'2011, Silviu Cotoar\u0103' +''' +sapteseri.ro +''' + +from calibre.web.feeds.news import BasicNewsRecipe + +class SapteSeri(BasicNewsRecipe): + title = u'Sapte Seri' + __author__ = u'Silviu Cotoar\u0103' + description = u'Sapte Seri' + publisher = u'Sapte Seri' + oldest_article = 5 + language = 'ro' + max_articles_per_feed = 100 + no_stylesheets = True + use_embedded_content = False + category = 'Ziare,Oras,Distractie,Fun' + encoding = 'utf-8' + remove_empty_feeds = True + remove_javascript = True + cover_url = 'http://www.sapteseri.ro/Images/logo.jpg' + + conversion_options = { + 'comments' : description + ,'tags' : category + ,'language' : language + ,'publisher' : publisher + } + + keep_only_tags = [ + dict(name='h1', attrs={'id':'title'}) + , dict(name='div', attrs={'class':'mt10 mb10'}) + , dict(name='div', attrs={'class':'mb20 mt10'}) + , dict(name='div', attrs={'class':'mt5 mb20'}) + ] + + remove_tags = [ + dict(name='div', attrs={'id':['entityimgworking']}) + ] + + feeds = [ + (u'Ce se intampla azi in Bucuresti', u'http://www.sapteseri.ro/ro/feed/ce-se-intampla-azi/bucuresti/') + ] + + def preprocess_html(self, soup): + return self.adeify_images(soup) diff --git a/resources/recipes/aventurilapescuit.recipe b/resources/recipes/aventurilapescuit.recipe new file mode 100644 index 0000000000..e151e77518 --- /dev/null +++ b/resources/recipes/aventurilapescuit.recipe @@ -0,0 +1,51 @@ +# -*- coding: utf-8 -*- +#!/usr/bin/env python + +__license__ = 'GPL v3' +__copyright__ = u'2011, Silviu Cotoar\u0103' +''' +aventurilapescuit.ro +''' + +from calibre.web.feeds.news import BasicNewsRecipe + +class AventuriLaPescuit(BasicNewsRecipe): + title = u'Aventuri La Pescuit' + __author__ = u'Silviu Cotoar\u0103' + description = 'Aventuri La Pescuit' + publisher = 'Aventuri La Pescuit' + oldest_article = 5 + language = 'ro' + max_articles_per_feed = 100 + no_stylesheets = True + use_embedded_content = False + category = 'Ziare,Pescuit,Hobby' + encoding = 'utf-8' + cover_url = 'http://www.aventurilapescuit.ro/images/logo.gif' + + conversion_options = { + 'comments' : description + ,'tags' : category + ,'language' : language + ,'publisher' : publisher + } + + keep_only_tags = [ + dict(name='div', attrs={'id':'Article'}) + ] + + remove_tags = [ + dict(name='div', attrs={'class':['right option']}) + , dict(name='iframe', attrs={'scrolling':['no']}) + ] + + remove_tags_after = [ + dict(name='iframe', attrs={'scrolling':['no']}) + ] + + feeds = [ + (u'Feeds', u'http://www.aventurilapescuit.ro/sections/rssread/1') + ] + + def preprocess_html(self, soup): + return self.adeify_images(soup) diff --git a/resources/recipes/chipro.recipe b/resources/recipes/chipro.recipe new file mode 100644 index 0000000000..e0d145ad19 --- /dev/null +++ b/resources/recipes/chipro.recipe @@ -0,0 +1,52 @@ +# -*- coding: utf-8 -*- +#!/usr/bin/env python + +__license__ = 'GPL v3' +__copyright__ = u'2011, Silviu Cotoar\u0103' +''' +chip.ro +''' + +from calibre.web.feeds.news import BasicNewsRecipe + +class ChipRo(BasicNewsRecipe): + title = u'Chip Online' + __author__ = u'Silviu Cotoar\u0103' + description = 'Chip Online' + publisher = 'Chip Online' + oldest_article = 5 + language = 'ro' + max_articles_per_feed = 100 + no_stylesheets = True + use_embedded_content = False + category = 'Ziare,Reviste,IT' + encoding = 'utf-8' + cover_url = 'http://www.chip.ro/images/logo.png' + + conversion_options = { + 'comments' : description + ,'tags' : category + ,'language' : language + ,'publisher' : publisher + } + + keep_only_tags = [ + dict(name='h2', attrs={'class':'contentheading clearfix'}) + , dict(name='span', attrs={'class':'createby'}) + , dict(name='div', attrs={'class':'article-content'}) + ] + + remove_tags = [ + dict(name='div', attrs={'class':['sharemecompactbutton']}) + ,dict(name='div', attrs={'align':['left']}) + ,dict(name='div', attrs={'align':['center']}) + ,dict(name='th', attrs={'class':['pagenav_prev']}) + ,dict(name='table', attrs={'class':['pagenav']}) + ] + + feeds = [ + (u'Feeds', u'http://www.chip.ro/index.php?option=com_ninjarsssyndicator&feed_id=9&format=raw') + ] + + def preprocess_html(self, soup): + return self.adeify_images(soup) diff --git a/resources/recipes/csid.recipe b/resources/recipes/csid.recipe new file mode 100644 index 0000000000..bde304e513 --- /dev/null +++ b/resources/recipes/csid.recipe @@ -0,0 +1,52 @@ +# -*- coding: utf-8 -*- +#!/usr/bin/env python + +__license__ = 'GPL v3' +__copyright__ = u'2011, Silviu Cotoar\u0103' +''' +csid.ro +''' + +from calibre.web.feeds.news import BasicNewsRecipe + +class CSID(BasicNewsRecipe): + title = u'Ce se \u00eent\u00e2mpl\u0103 doctore?' + __author__ = u'Silviu Cotoar\u0103' + description = u'Ce se \u00eent\u00e2mpl\u0103 doctore?' + publisher = 'CSID' + oldest_article = 5 + language = 'ro' + max_articles_per_feed = 100 + no_stylesheets = True + use_embedded_content = False + category = 'Ziare,Reviste,Femei,Health,Beauty' + encoding = 'utf-8' + cover_url = 'http://www.csid.ro/images/default/csid.gif' + + conversion_options = { + 'comments' : description + ,'tags' : category + ,'language' : language + ,'publisher' : publisher + } + + keep_only_tags = [ + dict(name='div', attrs={'class':'content floatleft'}) + ] + + remove_tags = [ + dict(name='div', attrs={'id':['article_links']}) + , dict(name='div', attrs={'id':['tags']}) + , dict(name='p', attrs={'id':['tags']}) + ] + + remove_tags_after = [ + dict(name='p', attrs={'id':['tags']}) + ] + + feeds = [ + (u'Feeds', u'http://www.csid.ro/rss/') + ] + + def preprocess_html(self, soup): + return self.adeify_images(soup) diff --git a/resources/recipes/curierulnational.recipe b/resources/recipes/curierulnational.recipe new file mode 100644 index 0000000000..32cba61b41 --- /dev/null +++ b/resources/recipes/curierulnational.recipe @@ -0,0 +1,54 @@ +# -*- coding: utf-8 -*- +#!/usr/bin/env python + +__license__ = 'GPL v3' +__copyright__ = u'2011, Silviu Cotoar\u0103' +''' +curierulnational.ro +''' + +from calibre.web.feeds.news import BasicNewsRecipe + +class CurierulNal(BasicNewsRecipe): + title = u'Curierul Na\u0163ional' + __author__ = u'Silviu Cotoar\u0103' + description = '' + publisher = 'Curierul Na\u0163ional' + oldest_article = 5 + language = 'ro' + max_articles_per_feed = 100 + no_stylesheets = True + use_embedded_content = False + category = 'Ziare,Stiri' + encoding = 'utf-8' + cover_url = 'http://www.curierulnational.ro/logo.gif' + + conversion_options = { + 'comments' : description + ,'tags' : category + ,'language' : language + ,'publisher' : publisher + } + + keep_only_tags = [ + dict(name='div', attrs={'id':'col1'}) + , dict(name='img', attrs={'id':'placeholder'}) + ] + + remove_tags = [ + dict(name='p', attrs={'id':['alteArticole']}) + , dict(name='div', attrs={'id':['textSize']}) + , dict(name='ul', attrs={'class':['unit-rating']}) + , dict(name='div', attrs={'id':['comments']}) + ] + + remove_tags_after = [ + dict(name='ul', attrs={'class':'unit-rating'}) + ] + + feeds = [ + (u'Feeds', u'http://www.curierulnational.ro/feed.xml') + ] + + def preprocess_html(self, soup): + return self.adeify_images(soup) diff --git a/resources/recipes/descopera.recipe b/resources/recipes/descopera.recipe new file mode 100644 index 0000000000..71560c1e0e --- /dev/null +++ b/resources/recipes/descopera.recipe @@ -0,0 +1,57 @@ +# -*- coding: utf-8 -*- +#!/usr/bin/env python + +__license__ = 'GPL v3' +__copyright__ = u'2011, Silviu Cotoar\u0103' +''' +descopera.ro +''' + +from calibre.web.feeds.news import BasicNewsRecipe + +class Descopera(BasicNewsRecipe): + title = u'Descoper\u0103' + __author__ = u'Silviu Cotoar\u0103' + description = 'E lumea ta' + publisher = 'Descopera' + oldest_article = 5 + language = 'ro' + max_articles_per_feed = 100 + no_stylesheets = True + use_embedded_content = False + category = 'Ziare,Reviste,Descopera' + encoding = 'utf-8' + cover_url = 'http://www.descopera.ro/images/header_images/logo.gif' + + conversion_options = { + 'comments' : description + ,'tags' : category + ,'language' : language + ,'publisher' : publisher + } + + + keep_only_tags = [ + dict(name='h1', attrs={'style':'font-family: Arial,Helvetica,sans-serif; font-size: 18px; color: rgb(51, 51, 51); font-weight: bold; margin: 10px 0pt; clear: both; float: left;width: 610px;'}) + ,dict(name='div', attrs={'style':'margin-right: 15px; margin-bottom: 15px; float: left;'}) + , dict(name='p', attrs={'id':'itemDescription'}) + ,dict(name='div', attrs={'id':'itemBody'}) + ] + + remove_tags = [ + dict(name='div', attrs={'class':['tools']}) + , dict(name='div', attrs={'class':['share']}) + , dict(name='div', attrs={'class':['category']}) + , dict(name='div', attrs={'id':['comments']}) + ] + + remove_tags_after = [ + dict(name='div', attrs={'id':'comments'}) + ] + + feeds = [ + (u'Feeds', u'http://www.descopera.ro/rss') + ] + + def preprocess_html(self, soup): + return self.adeify_images(soup) diff --git a/resources/recipes/ecuisine.recipe b/resources/recipes/ecuisine.recipe new file mode 100644 index 0000000000..53631e0b14 --- /dev/null +++ b/resources/recipes/ecuisine.recipe @@ -0,0 +1,55 @@ +# -*- coding: utf-8 -*- +#!/usr/bin/env python + +__license__ = 'GPL v3' +__copyright__ = u'2011, Silviu Cotoar\u0103' +''' +ecuisine.ro +''' + +from calibre.web.feeds.news import BasicNewsRecipe + +class EcuisineRo(BasicNewsRecipe): + title = u'eCuisine' + __author__ = u'Silviu Cotoar\u0103' + description = u'Reinventeaz\u0103 pl\u0103cerea de a g\u0103ti' + publisher = 'eCuisine' + oldest_article = 5 + language = 'ro' + max_articles_per_feed = 100 + no_stylesheets = True + use_embedded_content = False + category = 'Ziare,Retete,Bucatarie' + encoding = 'utf-8' + cover_url = '' + + conversion_options = { + 'comments' : description + ,'tags' : category + ,'language' : language + ,'publisher' : publisher + } + + keep_only_tags = [ + dict(name='div', attrs={'class':'page-title'}) + , dict(name='div', attrs={'class':'content clearfix'}) + ] + + remove_tags = [ + dict(name='ul', attrs={'id':['recipe-tabs']}) + , dict(name='div', attrs={'class':['recipe-body-rating clearfix']}) + , dict(name='div', attrs={'class':['recipe-body-flags']}) + , dict(name='div', attrs={'id':['tweetmeme_button']}) + , dict(name='div', attrs={'class':['fbshare']}) + , dict(name='a', attrs={'class':['button-rounded']}) + , dict(name='div', attrs={'class':['recipe-body-related']}) + , dict(name='div', attrs={'class':['fbshare']}) + , dict(name='div', attrs={'class':['link-wrapper']}) + ] + + feeds = [ + (u'Feeds', u'http://www.ecuisine.ro/rss') + ] + + def preprocess_html(self, soup): + return self.adeify_images(soup) diff --git a/resources/recipes/egirl.recipe b/resources/recipes/egirl.recipe new file mode 100644 index 0000000000..b456323db9 --- /dev/null +++ b/resources/recipes/egirl.recipe @@ -0,0 +1,43 @@ +# -*- coding: utf-8 -*- +#!/usr/bin/env python + +__license__ = 'GPL v3' +__copyright__ = u'2011, Silviu Cotoar\u0103' +''' +egirl.ro +''' + +from calibre.web.feeds.news import BasicNewsRecipe + +class EgirlRo(BasicNewsRecipe): + title = u'egirl' + __author__ = u'Silviu Cotoar\u0103' + description = u'Necesar pentru tine' + publisher = u'egirl' + oldest_article = 5 + language = 'ro' + max_articles_per_feed = 100 + no_stylesheets = True + use_embedded_content = False + category = 'Ziare,Reviste,Femei' + encoding = 'utf-8' + cover_url = 'http://www.egirl.ro/images/egirlNou/logo_egirl.gif' + + conversion_options = { + 'comments' : description + ,'tags' : category + ,'language' : language + ,'publisher' : publisher + } + + keep_only_tags = [ + dict(name='div', attrs={'id':'title_art'}) + , dict(name='div', attrs={'class':'content_style'}) + ] + + feeds = [ + (u'Feeds', u'http://www.egirl.ro/rss/egirl.xml') + ] + + def preprocess_html(self, soup): + return self.adeify_images(soup) diff --git a/resources/recipes/fhmro.recipe b/resources/recipes/fhmro.recipe new file mode 100644 index 0000000000..8a4bdeb4df --- /dev/null +++ b/resources/recipes/fhmro.recipe @@ -0,0 +1,53 @@ +# -*- coding: utf-8 -*- +#!/usr/bin/env python + +__license__ = 'GPL v3' +__copyright__ = u'2011, Silviu Cotoar\u0103' +''' +fhm.ro +''' + +from calibre.web.feeds.news import BasicNewsRecipe + +class FHMro(BasicNewsRecipe): + title = u'FHM Ro' + __author__ = u'Silviu Cotoar\u0103' + description = u'Pentru c\u0103 noi putem' + publisher = 'FHM' + oldest_article = 5 + language = 'ro' + max_articles_per_feed = 100 + no_stylesheets = True + use_embedded_content = False + category = 'Reviste' + encoding = 'utf-8' + cover_url = 'http://www.fhm.com/App_Resources/Images/Site/re-design/logo.gif' + + conversion_options = { + 'comments' : description + ,'tags' : category + ,'language' : language + ,'publisher' : publisher + } + + keep_only_tags = [ + dict(name='div', attrs={'class':'contentMainTitle'}) + , dict(name='div', attrs={'class':'entry'}) + ] + + remove_tags_after = [ + dict(name='div', attrs={'class':['ratingblock ']}) + , dict(name='a', attrs={'rel':['tag']}) + ] + + remove_tags = [ + dict(name='div', attrs={'class':['ratingblock ']}) + , dict(name='div', attrs={'class':['socialize-containter']}) + ] + + feeds = [ + (u'Feeds', u'http://www.fhm.ro/feed') + ] + + def preprocess_html(self, soup): + return self.adeify_images(soup) diff --git a/resources/recipes/go4it.recipe b/resources/recipes/go4it.recipe new file mode 100644 index 0000000000..ab875cb0de --- /dev/null +++ b/resources/recipes/go4it.recipe @@ -0,0 +1,48 @@ +# -*- coding: utf-8 -*- +#!/usr/bin/env python + +__license__ = 'GPL v3' +__copyright__ = u'2011, Silviu Cotoar\u0103' +''' +go4it.ro +''' + +from calibre.web.feeds.news import BasicNewsRecipe + +class Go4ITro(BasicNewsRecipe): + title = u'go4it' + __author__ = u'Silviu Cotoar\u0103' + description = 'Gadgeturi, Lifestyle, Tehnologie' + publisher = 'go4it' + oldest_article = 5 + language = 'ro' + max_articles_per_feed = 100 + no_stylesheets = True + use_embedded_content = False + category = 'Reviste,Ziare,IT' + encoding = 'utf-8' + cover_url = 'http://www.go4it.ro/images/logo.png' + + conversion_options = { + 'comments' : description + ,'tags' : category + ,'language' : language + ,'publisher' : publisher + } + + keep_only_tags = [ + dict(name='div', attrs={'class':'subTitle clearfix'}) + , dict(name='div', attrs={'class':'story'}) + ] + + remove_tags = [ + dict(name='span', attrs={'class':['data']}) + , dict(name='a', attrs={'class':['comments']}) + ] + + feeds = [ + (u'Feeds', u'http://feeds2.feedburner.com/Go4itro-Stiri') + ] + + def preprocess_html(self, soup): + return self.adeify_images(soup) diff --git a/resources/recipes/hotcity.recipe b/resources/recipes/hotcity.recipe new file mode 100644 index 0000000000..befc3f15d9 --- /dev/null +++ b/resources/recipes/hotcity.recipe @@ -0,0 +1,43 @@ +# -*- coding: utf-8 -*- +#!/usr/bin/env python + +__license__ = 'GPL v3' +__copyright__ = u'2011, Silviu Cotoar\u0103' +''' +hotcity.ro +''' + +from calibre.web.feeds.news import BasicNewsRecipe + +class HotcityRo(BasicNewsRecipe): + title = u'Hotcity' + __author__ = u'Silviu Cotoar\u0103' + description = u'Cultura urban\u0103 feminin\u0103' + publisher = 'Hotcity' + oldest_article = 5 + language = 'ro' + max_articles_per_feed = 100 + no_stylesheets = True + use_embedded_content = False + category = 'Ziare,Reviste' + encoding = 'utf-8' + cover_url = 'http://www.hotcity.ro/i/bg_header.gif' + + conversion_options = { + 'comments' : description + ,'tags' : category + ,'language' : language + ,'publisher' : publisher + } + + keep_only_tags = [ + dict(name='div', attrs={'class':'articol_title'}) + , dict(name='div', attrs={'class':'text'}) + ] + + feeds = [ + (u'Feeds', u'http://www.hotcity.ro/rss') + ] + + def preprocess_html(self, soup): + return self.adeify_images(soup) diff --git a/resources/recipes/intrefete.recipe b/resources/recipes/intrefete.recipe new file mode 100644 index 0000000000..e6471b6126 --- /dev/null +++ b/resources/recipes/intrefete.recipe @@ -0,0 +1,52 @@ +# -*- coding: utf-8 -*- +#!/usr/bin/env python + +__license__ = 'GPL v3' +__copyright__ = u'2011, Silviu Cotoar\u0103' +''' +intrefete.ro +''' + +from calibre.web.feeds.news import BasicNewsRecipe + +class Intrefete(BasicNewsRecipe): + title = u'\u00centre fete' + __author__ = u'Silviu Cotoar\u0103' + description = u'Petrece ziua cu stil, afl\u0103 ce e nou \u00eentre fete' + publisher = u'Intre fete' + oldest_article = 5 + language = 'ro' + max_articles_per_feed = 100 + no_stylesheets = True + use_embedded_content = False + category = 'Ziare,Reviste,Femei' + encoding = 'utf-8' + cover_url = 'http://storage0.dms.mpinteractiv.ro/media/2/1401/16788/5878693/5/logo.jpg?width=300' + + conversion_options = { + 'comments' : description + ,'tags' : category + ,'language' : language + ,'publisher' : publisher + } + + keep_only_tags = [ + dict(name='div', attrs={'class':'article'}) + ] + + remove_tags = [ + dict(name='div', attrs={'class':['author']}) + , dict(name='div', attrs={'class':['tags']}) + , dict(name='iframe', attrs={'scrolling':['no']}) + ] + + remove_tags_after = [ + dict(name='iframe', attrs={'scrolling':['no']}) + ] + + feeds = [ + (u'Feeds', u'http://www.intrefete.ro/rss/') + ] + + def preprocess_html(self, soup): + return self.adeify_images(soup) diff --git a/resources/recipes/kudika.recipe b/resources/recipes/kudika.recipe new file mode 100644 index 0000000000..dfc94f7456 --- /dev/null +++ b/resources/recipes/kudika.recipe @@ -0,0 +1,47 @@ +# -*- coding: utf-8 -*- +#!/usr/bin/env python + +__license__ = 'GPL v3' +__copyright__ = u'2011, Silviu Cotoar\u0103' +''' +kudika.ro +''' + +from calibre.web.feeds.news import BasicNewsRecipe + +class Kudika(BasicNewsRecipe): + title = u'Kudika' + __author__ = u'Silviu Cotoar\u0103' + description = u'Revist\u0103 pentru femei' + publisher = 'Kudika' + oldest_article = 5 + language = 'ro' + max_articles_per_feed = 100 + no_stylesheets = True + use_embedded_content = False + category = 'Ziare,Reviste,Femei' + encoding = 'utf-8' + cover_url = 'http://img.kudika.ro/images/template/page-logo.png' + + conversion_options = { + 'comments' : description + ,'tags' : category + ,'language' : language + ,'publisher' : publisher + } + + keep_only_tags = [ + dict(name='div', attrs={'class':'header_recommend_article'}), + dict(name='div', attrs={'id':'intertext_women'}) + ] + + remove_tags = [ + dict(name='p', attrs={'class':['page_breadcrumbs']}) + , dict(name='div', attrs={'class':['standard']}) + , dict(name='div', attrs={'id':['recommend_allover']}) + ] + + feeds = [ (u'Feeds', u'http://www.kudika.ro/feed.xml') ] + + def preprocess_html(self, soup): + return self.adeify_images(soup) diff --git a/resources/recipes/nationalgeoro.recipe b/resources/recipes/nationalgeoro.recipe new file mode 100644 index 0000000000..a3c5727d38 --- /dev/null +++ b/resources/recipes/nationalgeoro.recipe @@ -0,0 +1,48 @@ +# -*- coding: utf-8 -*- +#!/usr/bin/env python + +__license__ = 'GPL v3' +__copyright__ = u'2011, Silviu Cotoar\u0103' +''' +natgeo.ro +''' + +from calibre.web.feeds.news import BasicNewsRecipe + +class NationalGeoRo(BasicNewsRecipe): + title = u'National Geographic RO' + __author__ = u'Silviu Cotoar\u0103' + description = u'S\u0103 avem grij\u0103 de planet\u0103' + publisher = 'National Geographic' + oldest_article = 5 + language = 'ro' + max_articles_per_feed = 100 + no_stylesheets = True + use_embedded_content = False + category = 'Reviste' + encoding = 'utf-8' + cover_url = 'http://wiki.benecke.com/images/c/c4/NatGeographic_Logo.jpg' + + conversion_options = { + 'comments' : description + ,'tags' : category + ,'language' : language + ,'publisher' : publisher + } + + keep_only_tags = [ + dict(name='h2', attrs={'class':'contentheading clearfix'}) + , dict(name='div', attrs={'class':'article-content'}) + + ] + + remove_tags = [ + dict(name='div', attrs={'class':['phocagallery']}) + ] + + feeds = [ + (u'Feeds', u'http://www.natgeo.ro/index.php?format=feed&type=rss') + ] + + def preprocess_html(self, soup): + return self.adeify_images(soup) diff --git a/resources/recipes/romanialibera.recipe b/resources/recipes/romanialibera.recipe new file mode 100644 index 0000000000..0cb401fdf4 --- /dev/null +++ b/resources/recipes/romanialibera.recipe @@ -0,0 +1,59 @@ +# -*- coding: utf-8 -*- +#!/usr/bin/env python + +__license__ = 'GPL v3' +__copyright__ = u'2011, Silviu Cotoar\u0103' +''' +romanialibera.ro +''' + +from calibre.web.feeds.news import BasicNewsRecipe + +class RomaniaLibera(BasicNewsRecipe): + title = u'Rom\u00e2nia Liber\u0103' + __author__ = u'Silviu Cotoar\u0103' + description = u'Rom\u00e2nia Liber\u0103' + publisher = u'Rom\u00e2nia Liber\u0103' + oldest_article = 5 + language = 'ro' + max_articles_per_feed = 100 + no_stylesheets = True + use_embedded_content = False + category = 'Ziare,Stiri' + encoding = 'utf-8' + cover_url = 'http://www.romanialibera.ro/templates/lilac/images/sigla_1.gif' + + conversion_options = { + 'comments' : description + ,'tags' : category + ,'language' : language + ,'publisher' : publisher + } + + keep_only_tags = [ + dict(name='div', attrs={'id':'articol'}) + ] + + remove_tags = [ + dict(name='div', attrs={'id':['art_actions']}) + , dict(name='div', attrs={'class':['stats']}) + , dict(name='div', attrs={'class':['data']}) + , dict(name='div', attrs={'class':['autori']}) + , dict(name='div', attrs={'class':['banda_explicatii_text']}) + , dict(name='td', attrs={'class':['connect_widget_vertical_center connect_widget_button_cell']}) + , dict(name='div', attrs={'class':['aceeasi_tema']}) + , dict(name='div', attrs={'class':['art_after_text']}) + , dict(name='div', attrs={'class':['navigare']}) + , dict(name='div', attrs={'id':['art_text_left']}) + ] + + remove_tags_after = [ + dict(name='div', attrs={'class':'art_after_text'}) + ] + + feeds = [ + (u'Feeds', u'http://www.romanialibera.ro/rss.xml') + ] + + def preprocess_html(self, soup): + return self.adeify_images(soup) diff --git a/resources/recipes/sfin.recipe b/resources/recipes/sfin.recipe new file mode 100644 index 0000000000..90c094a0c1 --- /dev/null +++ b/resources/recipes/sfin.recipe @@ -0,0 +1,55 @@ +# -*- coding: utf-8 -*- +#!/usr/bin/env python + +__license__ = 'GPL v3' +__copyright__ = u'2011, Silviu Cotoar\u0103' +''' +sfin.ro +''' + +from calibre.web.feeds.news import BasicNewsRecipe + +class Sfin(BasicNewsRecipe): + title = u'S\u0103pt\u0103m\u00e2na Financiar\u0103' + __author__ = u'Silviu Cotoar\u0103' + description = 'SFIN' + publisher = 'SFIN' + oldest_article = 5 + language = 'ro' + max_articles_per_feed = 100 + no_stylesheets = True + use_embedded_content = False + category = 'Ziare,Stiri,Economie,Business' + encoding = 'utf-8' + cover_url = 'http://img.9am.ro/images/logo_surse/saptamana_financiara.gif' + + conversion_options = { + 'comments' : description + ,'tags' : category + ,'language' : language + ,'publisher' : publisher + } + + keep_only_tags = [ + dict(name='div', attrs={'class':'col2ContentLeft'}) + , dict(name='div', attrs={'id':'contentArticol'}) + ] + + remove_tags = [ + dict(name='div', attrs={'class':['infoArticol']}) + , dict(name='div', attrs={'class':['separator']}) + , dict(name='div', attrs={'class':['tags']}) + , dict(name='div', attrs={'id':['comments']}) + , dict(name='div', attrs={'class':'boxForm'}) + ] + + remove_tags_after = [ + dict(name='div', attrs={'class':'tags'}) + ] + + feeds = [ + (u'Feeds', u'http://www.sfin.ro/rss') + ] + + def preprocess_html(self, soup): + return self.adeify_images(soup) diff --git a/resources/recipes/superbebe.recipe b/resources/recipes/superbebe.recipe new file mode 100644 index 0000000000..00433b6084 --- /dev/null +++ b/resources/recipes/superbebe.recipe @@ -0,0 +1,51 @@ +# -*- coding: utf-8 -*- +#!/usr/bin/env python + +__license__ = 'GPL v3' +__copyright__ = u'2011, Silviu Cotoar\u0103' +''' +superbebe.ro +''' + +from calibre.web.feeds.news import BasicNewsRecipe + +class Superbebe(BasicNewsRecipe): + title = u'Superbebe' + __author__ = u'Silviu Cotoar\u0103' + description = 'Superbebe' + publisher = 'Superbebe' + oldest_article = 5 + language = 'ro' + max_articles_per_feed = 100 + no_stylesheets = True + use_embedded_content = False + category = 'Ziare,Reviste,Bebe,Mamici' + encoding = 'utf-8' + cover_url = 'http://www.superbebe.ro/images/superbebe.gif' + + conversion_options = { + 'comments' : description + ,'tags' : category + ,'language' : language + ,'publisher' : publisher + } + + keep_only_tags = [ + dict(name='div', attrs={'class':'articol'}) + ] + + remove_tags = [ + dict(name='div', attrs={'class':['info']}) + , dict(name='div', attrs={'class':['tags']}) + ] + + remove_tags_after = [ + dict(name='div', attrs={'class':['tags']}) + ] + + feeds = [ + (u'Feeds', u'http://www.superbebe.ro/rss') + ] + + def preprocess_html(self, soup): + return self.adeify_images(soup) diff --git a/resources/recipes/swiatkindle.recipe b/resources/recipes/swiatkindle.recipe index a96b4d3ca6..62be2fe735 100644 --- a/resources/recipes/swiatkindle.recipe +++ b/resources/recipes/swiatkindle.recipe @@ -8,6 +8,8 @@ swiatkindle.pl import re +from calibre.web.feeds.news import BasicNewsRecipe + class swiatkindle(BasicNewsRecipe): title = u'Swiat Kindle' description = u'Blog o czytniku Amazon Kindle. Wersje, ksi\u0105\u017cki, kupowanie i korzystanie w Polsce' diff --git a/resources/recipes/tabu.recipe b/resources/recipes/tabu.recipe new file mode 100644 index 0000000000..d0ede613fd --- /dev/null +++ b/resources/recipes/tabu.recipe @@ -0,0 +1,54 @@ +# -*- coding: utf-8 -*- +#!/usr/bin/env python + +__license__ = 'GPL v3' +__copyright__ = u'2011, Silviu Cotoar\u0103' +''' +tabu.ro +''' + +from calibre.web.feeds.news import BasicNewsRecipe + +class TabuRo(BasicNewsRecipe): + title = u'Tabu' + __author__ = u'Silviu Cotoar\u0103' + description = 'Cel mai curajos site de femei' + publisher = 'Tabu' + oldest_article = 5 + language = 'ro' + max_articles_per_feed = 100 + no_stylesheets = True + use_embedded_content = False + category = 'Ziare,Reviste,Femei' + encoding = 'utf-8' + cover_url = 'http://www.tabu.ro/img/tabu-logo2.png' + + conversion_options = { + 'comments' : description + ,'tags' : category + ,'language' : language + ,'publisher' : publisher + } + + keep_only_tags = [ + dict(name='div', attrs={'id':'Article'}), + ] + + remove_tags = [ + dict(name='div', attrs={'id':['advertisementArticle']}), + dict(name='div', attrs={'class':'voting_number'}), + dict(name='div', attrs={'id':'number_votes'}), + dict(name='div', attrs={'id':'rating_one'}), + dict(name='div', attrs={'class':'float: right;'}) + ] + + remove_tags_after = [ + dict(name='div', attrs={'id':'comments'}), + ] + + feeds = [ + (u'Feeds', u'http://www.tabu.ro/rss_all.xml') + ] + + def preprocess_html(self, soup): + return self.adeify_images(soup) diff --git a/resources/recipes/unica.recipe b/resources/recipes/unica.recipe new file mode 100644 index 0000000000..b6be44a504 --- /dev/null +++ b/resources/recipes/unica.recipe @@ -0,0 +1,56 @@ +# -*- coding: utf-8 -*- +#!/usr/bin/env python + +__license__ = 'GPL v3' +__copyright__ = u'2011, Silviu Cotoar\u0103' +''' +unica.ro +''' + +from calibre.web.feeds.news import BasicNewsRecipe + +class Unica(BasicNewsRecipe): + title = u'Unica' + __author__ = u'Silviu Cotoar\u0103' + description = 'Asa cum esti tu' + publisher = 'Unica' + oldest_article = 5 + language = 'ro' + max_articles_per_feed = 100 + no_stylesheets = True + use_embedded_content = False + category = 'Ziare,Reviste,Femei' + encoding = 'utf-8' + cover_url = 'http://www.unica.ro/fileadmin/images/logo.gif' + + conversion_options = { + 'comments' : description + ,'tags' : category + ,'language' : language + ,'publisher' : publisher + } + + keep_only_tags = [ + dict(name='div', attrs={'id':'sticky'}) + , dict(name='p', attrs={'class':'bodytext'}) + + ] + + remove_tags = [ + dict(name='div', attrs={'class':['top-links']}) + , dict(name='div', attrs={'id':['autor_name']}) + , dict(name='div', attrs={'class':['box-r']}) + , dict(name='div', attrs={'class':['category']}) + , dict(name='div', attrs={'class':['data']}) + ] + + remove_tags_after = [ + dict(name='ul', attrs={'class':'pager'}) + ] + + feeds = [ + (u'Feeds', u'http://www.unica.ro/rss.html') + ] + + def preprocess_html(self, soup): + return self.adeify_images(soup) diff --git a/src/calibre/ebooks/conversion/utils.py b/src/calibre/ebooks/conversion/utils.py index 3559f13440..f1f2f87293 100644 --- a/src/calibre/ebooks/conversion/utils.py +++ b/src/calibre/ebooks/conversion/utils.py @@ -33,7 +33,7 @@ class HeuristicProcessor(object): self.any_multi_blank = re.compile(r'(\s*
]*>\s*
(\s*]*>\s*
)', re.IGNORECASE) + self.single_blank = re.compile(r'(\s*<(p|div)[^>]*>\s*(p|div)>)', re.IGNORECASE) self.scene_break_open = ''
self.common_in_text_endings = u'[\"\'—’”,\.!\?\…\)„\w]'
self.common_in_text_beginnings = u'[\w\'\"“‘‛]'
@@ -451,8 +451,8 @@ class HeuristicProcessor(object):
return html
def detect_whitespace(self, html):
- blanks_around_headings = re.compile(r'(?P ]*>\s* ]*>\s* ]*>\s* ]*>\s*(<(span|[ibu]|em|strong|font)[^>]*>\s*)*.{1,100}?[^\W]((span|[ibu]|em|strong|font)>\s*)* ]*>\s* ]*>\s* ]*>\s*(<(span|[ibu]|em|strong|font)[^>]*>\s*)*.{1,100}?[^\W]((span|[ibu]|em|strong|font)>\s*)* ]*>\s* [^\"]*?);?">)(?P ]*>)', '\g]*>))', re.IGNORECASE)
empty_paragraph = '\n
(
]*>\s*){1,})(?P
', self.processed_html) - self.processed_html = re.sub(r'(?i)(?P]*>)\s*(?P
(]*>\s*){1,})', '\g'+'\g', self.processed_html) + self.processed_html = re.sub(r'(?i)(?P ((blockquote|div)[^>]*>\s*){1,})(?P]*>)', '\g '+'\g ', self.processed_html) + self.processed_html = re.sub(r'(?i)(?P]*>)\s*(?P
(<(blockquote|div)[^>]*>\s*){1,})', '\g'+'\g', self.processed_html) def remove_random_bytes(self, html): diff --git a/src/calibre/gui2/comments_editor.py b/src/calibre/gui2/comments_editor.py index c7f7d8b94a..a594af739e 100644 --- a/src/calibre/gui2/comments_editor.py +++ b/src/calibre/gui2/comments_editor.py @@ -254,7 +254,8 @@ class EditorWidget(QWebView): # {{{ f = QFontInfo(QApplication.font(self)).pixelSize() style = 'font-size: %dpx;' % (f,) - for body in self.page().mainFrame().documentElement().findAll('body'): + # toList() is needed because PyQt on Debian is old/broken + for body in self.page().mainFrame().documentElement().findAll('body').toList(): body.setAttribute('style', style) self.page().setContentEditable(True)