diff --git a/resources/images/news/cotidianul.png b/resources/images/news/cotidianul.png new file mode 100644 index 0000000000..2e57dbde54 Binary files /dev/null and b/resources/images/news/cotidianul.png differ diff --git a/resources/images/news/ele.png b/resources/images/news/ele.png new file mode 100644 index 0000000000..82f66b5caa Binary files /dev/null and b/resources/images/news/ele.png differ diff --git a/resources/images/news/felicia.png b/resources/images/news/felicia.png new file mode 100644 index 0000000000..4bc1fd35d8 Binary files /dev/null and b/resources/images/news/felicia.png differ diff --git a/resources/images/news/financiarul.png b/resources/images/news/financiarul.png new file mode 100644 index 0000000000..1d91a72a34 Binary files /dev/null and b/resources/images/news/financiarul.png differ diff --git a/resources/images/news/imperatortravel.png b/resources/images/news/imperatortravel.png new file mode 100644 index 0000000000..c459759ed0 Binary files /dev/null and b/resources/images/news/imperatortravel.png differ diff --git a/resources/images/news/monden.png b/resources/images/news/monden.png new file mode 100644 index 0000000000..fcf8ad42ae Binary files /dev/null and b/resources/images/news/monden.png differ diff --git a/resources/images/news/promotor.png b/resources/images/news/promotor.png new file mode 100644 index 0000000000..a479cf135b Binary files /dev/null and b/resources/images/news/promotor.png differ diff --git a/resources/images/news/timesnewroman.png b/resources/images/news/timesnewroman.png new file mode 100644 index 0000000000..6ba02939b4 Binary files /dev/null and b/resources/images/news/timesnewroman.png differ diff --git a/resources/recipes/cotidianul.recipe b/resources/recipes/cotidianul.recipe new file mode 100644 index 0000000000..f00196532c --- /dev/null +++ b/resources/recipes/cotidianul.recipe @@ -0,0 +1,69 @@ +# -*- coding: utf-8 -*- +#!/usr/bin/env python + +__license__ = 'GPL v3' +__copyright__ = u'2011, Silviu Cotoar\u0103' +''' +cotidianul.ro +''' + +from calibre.web.feeds.news import BasicNewsRecipe + +class Cotidianul(BasicNewsRecipe): + title = u'Cotidianul' + __author__ = u'Silviu Cotoar\u0103' + description = u'' + publisher = u'Cotidianul' + oldest_article = 25 + language = 'ro' + max_articles_per_feed = 100 + no_stylesheets = True + use_embedded_content = False + category = 'Ziare,Stiri' + encoding = 'utf-8' + cover_url = 'http://www.cotidianul.ro/images/cotidianul.png' + + conversion_options = { + 'comments' : description + ,'tags' : category + ,'language' : language + ,'publisher' : publisher + } + + extra_css = ''' + h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;} + h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;} + .byline {font-family:Arial,Helvetica,sans-serif; font-size:xx-small;} + .date {font-family:Arial,Helvetica,sans-serif; font-size:xx-small;} + p{font-family:Arial,Helvetica,sans-serif;font-size:small;} + .copyright {font-family:Arial,Helvetica,sans-serif;font-size:xx-small;text-align:center} + .story{font-family:Arial,Helvetica,sans-serif;font-size:small;} + .entry-asset asset hentry{font-family:Arial,Helvetica,sans-serif;font-size:small;} + .pagebody{font-family:Arial,Helvetica,sans-serif;font-size:small;} + .maincontentcontainer{font-family:Arial,Helvetica,sans-serif;font-size:small;} + .story-body{font-family:Arial,Helvetica,sans-serif;font-size:small;} + body{font-family:Helvetica,Arial,sans-serif;font-size:small;} + ''' + + keep_only_tags = [ + dict(name='div', attrs={'class':'titlu'}) + , dict(name='div', attrs={'class':'gallery clearfix'}) + , dict(name='div', attrs={'align':'justify'}) + ] + + remove_tags = [ + dict(name='div', attrs={'class':['space']}) + , dict(name='div', attrs={'id':['title_desc']}) + ] + + remove_tags_after = [ + dict(name='div', attrs={'class':['space']}) + , dict(name='span', attrs={'class':['date']}) + ] + + feeds = [ + (u'Feeds', u'http://www.cotidianul.ro/rssfeed/ToateStirile.xml') + ] + + def preprocess_html(self, soup): + return self.adeify_images(soup) diff --git a/resources/recipes/ele.recipe b/resources/recipes/ele.recipe new file mode 100644 index 0000000000..ea8954366b --- /dev/null +++ b/resources/recipes/ele.recipe @@ -0,0 +1,58 @@ +# -*- coding: utf-8 -*- +#!/usr/bin/env python + +__license__ = 'GPL v3' +__copyright__ = u'2011, Silviu Cotoar\u0103' +''' +ele.ro +''' + +from calibre.web.feeds.news import BasicNewsRecipe + +class Ele(BasicNewsRecipe): + title = u'Ele' + __author__ = u'Silviu Cotoar\u0103' + description = u'Dezv\u0103luie ceea ce e\u015fti' + publisher = u'Ele' + oldest_article = 25 + language = 'ro' + max_articles_per_feed = 100 + no_stylesheets = True + use_embedded_content = False + category = 'Ziare,Femei' + encoding = 'utf-8' + cover_url = 'http://www.tripmedia.ro/tripadmin/photos/logo_ele_mare.jpg' + + conversion_options = { + 'comments' : description + ,'tags' : category + ,'language' : language + ,'publisher' : publisher + } + + extra_css = ''' + h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;} + h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;} + .byline {font-family:Arial,Helvetica,sans-serif; font-size:xx-small;} + .date {font-family:Arial,Helvetica,sans-serif; font-size:xx-small;} + p{font-family:Arial,Helvetica,sans-serif;font-size:small;} + .copyright {font-family:Arial,Helvetica,sans-serif;font-size:xx-small;text-align:center} + .story{font-family:Arial,Helvetica,sans-serif;font-size:small;} + .entry-asset asset hentry{font-family:Arial,Helvetica,sans-serif;font-size:small;} + .pagebody{font-family:Arial,Helvetica,sans-serif;font-size:small;} + .maincontentcontainer{font-family:Arial,Helvetica,sans-serif;font-size:small;} + .story-body{font-family:Arial,Helvetica,sans-serif;font-size:small;} + body{font-family:Helvetica,Arial,sans-serif;font-size:small;} + ''' + + keep_only_tags = [ + dict(name='h1', attrs={'class':'article_title'}) + , dict(name='div', attrs={'class':'article_text'}) + ] + + feeds = [ + (u'Feeds', u'http://www.ele.ro/rss_must_read') + ] + + def preprocess_html(self, soup): + return self.adeify_images(soup) diff --git a/resources/recipes/felicia.recipe b/resources/recipes/felicia.recipe new file mode 100644 index 0000000000..0772e38494 --- /dev/null +++ b/resources/recipes/felicia.recipe @@ -0,0 +1,48 @@ +# -*- coding: utf-8 -*- +#!/usr/bin/env python + +__license__ = 'GPL v3' +__copyright__ = u'2011, Silviu Cotoar\u0103' +''' +revistafelicia.ro +''' + +from calibre.web.feeds.news import BasicNewsRecipe + +class Felicia(BasicNewsRecipe): + title = u'Revista Felicia' + __author__ = u'Silviu Cotoar\u0103' + description = u'O revist\u0103 pentru sufletul t\u0103u' + publisher = u'Revista Felicia' + oldest_article = 25 + language = 'ro' + max_articles_per_feed = 100 + no_stylesheets = True + use_embedded_content = False + category = 'Ziare,Reviste' + encoding = 'utf-8' + cover_url = 'http://www.3waves.net/uploads/image/logo-revista-felicia_03.jpg' + + conversion_options = { + 'comments' : description + ,'tags' : category + ,'language' : language + ,'publisher' : publisher + } + + keep_only_tags = [ + dict(name='div', attrs={'class':'header'}) + , dict(name='div', attrs={'id':'contentArticol'}) + ] + + remove_tags = [ + dict(name='img',attrs={'src':['http://www.revistafelicia.ro/templates/default/images/hdr_ultimul_nr.jpg']}) + , dict(name='div',attrs={'class':['content']}) + ] + + feeds = [ + (u'Feeds', u'http://www.revistafelicia.ro/rss') + ] + + def preprocess_html(self, soup): + return self.adeify_images(soup) diff --git a/resources/recipes/financiarul.recipe b/resources/recipes/financiarul.recipe new file mode 100644 index 0000000000..807f771408 --- /dev/null +++ b/resources/recipes/financiarul.recipe @@ -0,0 +1,55 @@ +# -*- coding: utf-8 -*- +#!/usr/bin/env python + +__license__ = 'GPL v3' +__copyright__ = u'2011, Silviu Cotoar\u0103' +''' +financiarul.com +''' + +from calibre.web.feeds.news import BasicNewsRecipe + +class Financiarul(BasicNewsRecipe): + title = u'Financiarul' + __author__ = u'Silviu Cotoar\u0103' + description = u'FIN.ro' + publisher = u'Financiarul' + oldest_article = 25 + language = 'ro' + max_articles_per_feed = 100 + no_stylesheets = True + use_embedded_content = False + category = 'Ziare,Stiri' + encoding = 'utf-8' + cover_url = 'http://www.financiarul.com/templates/default/images/logo.png' + + conversion_options = { + 'comments' : description + ,'tags' : category + ,'language' : language + ,'publisher' : publisher + } + + keep_only_tags = [ + dict(name='div', attrs={'class':'col2ContentLeftL'}) + ] + + remove_tags = [ + dict(name='div',attrs={'class':['infoArticol']}) + , dict(name='ul', attrs={'class':'navSectiuni'}) + , dict(name='div', attrs={'class':'separator separatorTop'}) + , dict(name='div', attrs={'class':'infoArticol infoArticolBottom'}) + , dict(name='ul', attrs={'class':['related']}) + , dict(name='div', attrs={'class':['slot panel300 panelGri300 panelGri300s panelGri300sm']}) + ] + + remove_tags_after = [ + dict(name='ul', attrs={'class':['related']}) + ] + + feeds = [ + (u'Feeds', u'http://www.financiarul.com/rss') + ] + + def preprocess_html(self, soup): + return self.adeify_images(soup) diff --git a/resources/recipes/imperatortravel.recipe b/resources/recipes/imperatortravel.recipe new file mode 100644 index 0000000000..2b6d323bf5 --- /dev/null +++ b/resources/recipes/imperatortravel.recipe @@ -0,0 +1,68 @@ +# -*- coding: utf-8 -*- +#!/usr/bin/env python + +__license__ = 'GPL v3' +__copyright__ = u'2011, Silviu Cotoar\u0103' +''' +imperatortravel.ro +''' + +from calibre.web.feeds.news import BasicNewsRecipe + +class Imperatortravel(BasicNewsRecipe): + title = u'Imperator Travel' + __author__ = u'Silviu Cotoar\u0103' + description = u'C\u0103l\u0103torii' + publisher = u'Imperator Travel' + oldest_article = 25 + language = 'ro' + max_articles_per_feed = 100 + no_stylesheets = True + use_embedded_content = False + category = 'Ziare,Stiri,Turism,Calatorii' + encoding = 'utf-8' + cover_url = 'http://www.imperatortravel.ro/images/header-1.jpg' + + conversion_options = { + 'comments' : description + ,'tags' : category + ,'language' : language + ,'publisher' : publisher + } + + extra_css = ''' + h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;} + h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;} + .byline {font-family:Arial,Helvetica,sans-serif; font-size:xx-small;} + .date {font-family:Arial,Helvetica,sans-serif; font-size:xx-small;} + p{font-family:Arial,Helvetica,sans-serif;font-size:small;} + .copyright {font-family:Arial,Helvetica,sans-serif;font-size:xx-small;text-align:center} + .story{font-family:Arial,Helvetica,sans-serif;font-size:small;} + .entry-asset asset hentry{font-family:Arial,Helvetica,sans-serif;font-size:small;} + .pagebody{font-family:Arial,Helvetica,sans-serif;font-size:small;} + .maincontentcontainer{font-family:Arial,Helvetica,sans-serif;font-size:small;} + .story-body{font-family:Arial,Helvetica,sans-serif;font-size:small;} + body{font-family:Helvetica,Arial,sans-serif;font-size:small;} + ''' + + keep_only_tags = [ + dict(name='div', attrs={'class':'article first_main_article'}) + ] + + remove_tags = [ + dict(name='div', attrs={'class':['meta']}) + , dict(name='body', attrs={'class':['transparent_widget ff3 win Locale_en_US']}) + , dict(name='div', attrs={'class':['connect_widget']}) + , dict(name='ul', attrs={'class':['similar-posts']}) + ] + + remove_tags_after = [ + dict(name='ul', attrs={'class':['similar-posts']}) + ] + + feeds = [ + (u'Feeds', u'http://feeds.feedburner.com/ImperatorTravels') + ] + + def preprocess_html(self, soup): + return self.adeify_images(soup) diff --git a/resources/recipes/lanacion.recipe b/resources/recipes/lanacion.recipe index 05e777ec67..425aa9b193 100644 --- a/resources/recipes/lanacion.recipe +++ b/resources/recipes/lanacion.recipe @@ -1,5 +1,5 @@ __license__ = 'GPL v3' -__copyright__ = '2008-2010, Darko Miletic ' +__copyright__ = '2008-2011, Darko Miletic ' ''' lanacion.com.ar ''' @@ -19,9 +19,10 @@ class Lanacion(BasicNewsRecipe): language = 'es_AR' publication_type = 'newspaper' remove_empty_feeds = True - masthead_url = 'http://www.lanacion.com.ar/imgs/layout/logos/ln341x47.gif' - extra_css = """ h1{font-family: Georgia,serif} - h2{color: #626262} + masthead_url = 'http://www.lanacion.com.ar/_ui/desktop/imgs/layout/logos/ln341x47.gif' + extra_css = """ + h1{font-family: Georgia,serif} + h2{color: #626262; font-weight: normal; font-size: 1.1em} body{font-family: Arial,sans-serif} img{margin-top: 0.5em; margin-bottom: 0.2em; display: block} .notaFecha{color: #808080} @@ -37,47 +38,78 @@ class Lanacion(BasicNewsRecipe): , 'language' : language } - keep_only_tags = [dict(name='div', attrs={'class':['nota floatFix','topNota','nota','post']})] + keep_only_tags = [dict(name='div', attrs={'id':'content'})] + remove_tags = [ dict(name='div' , attrs={'class':'notaComentario floatFix noprint' }) ,dict(name='ul' , attrs={'class':['cajaHerramientas cajaTop noprint','herramientas noprint']}) - ,dict(name='div' , attrs={'class':['cajaHerramientas noprint','cajaHerramientas floatFix'] }) - ,dict(attrs={'class':['titulosMultimedia','derecha','techo color','encuesta','izquierda compartir','floatFix','videoCentro']}) + ,dict(name='div' , attrs={'class':['titulosMultimedia','herramientas noprint','cajaHerramientas noprint','cajaHerramientas floatFix'] }) + ,dict(attrs={'class':['izquierda','espacio17','espacio10','espacio20','floatFix ultimasNoticias','relacionadas','titulosMultimedia','derecha','techo color','encuesta','izquierda compartir','floatFix','videoCentro']}) ,dict(name=['iframe','embed','object','form','base','hr','meta','link','input']) ] + remove_tags_after = dict(attrs={'class':['tags','nota-destacado']}) remove_attributes = ['height','width','visible','onclick','data-count','name'] feeds = [ - (u'Ultimas noticias' , u'http://www.lanacion.com.ar/herramientas/rss/index.asp?origen=2' ) - ,(u'Politica' , u'http://www.lanacion.com.ar/herramientas/rss/index.asp?categoria_id=30' ) - ,(u'Economia' , u'http://www.lanacion.com.ar/herramientas/rss/index.asp?categoria_id=272' ) - ,(u'Deportes' , u'http://www.lanacion.com.ar/herramientas/rss/index.asp?categoria_id=131' ) - ,(u'Informacion General' , u'http://www.lanacion.com.ar/herramientas/rss/index.asp?categoria_id=21' ) - ,(u'Cultura' , u'http://www.lanacion.com.ar/herramientas/rss/index.asp?categoria_id=1' ) - ,(u'Opinion' , u'http://www.lanacion.com.ar/herramientas/rss/index.asp?categoria_id=28' ) - ,(u'Espectaculos' , u'http://www.lanacion.com.ar/herramientas/rss/index.asp?categoria_id=120' ) - ,(u'Exterior' , u'http://www.lanacion.com.ar/herramientas/rss/index.asp?categoria_id=7' ) - ,(u'Ciencia&Salud' , u'http://www.lanacion.com.ar/herramientas/rss/index.asp?categoria_id=498' ) - ,(u'Revista' , u'http://www.lanacion.com.ar/herramientas/rss/index.asp?categoria_id=494' ) - ,(u'Enfoques' , u'http://www.lanacion.com.ar/herramientas/rss/index.asp?categoria_id=421' ) - ,(u'Comercio Exterior' , u'http://www.lanacion.com.ar/herramientas/rss/index.asp?categoria_id=347' ) - ,(u'Tecnologia' , u'http://www.lanacion.com.ar/herramientas/rss/index.asp?categoria_id=432' ) - ,(u'Arquitectura' , u'http://www.lanacion.com.ar/herramientas/rss/index.asp?categoria_id=366' ) - ,(u'Turismo' , u'http://www.lanacion.com.ar/herramientas/rss/index.asp?categoria_id=504' ) - ,(u'Al volante' , u'http://www.lanacion.com.ar/herramientas/rss/index.asp?categoria_id=371' ) - ,(u'El Campo' , u'http://www.lanacion.com.ar/herramientas/rss/index.asp?categoria_id=337' ) - ,(u'Moda y Belleza' , u'http://www.lanacion.com.ar/herramientas/rss/index.asp?categoria_id=1312' ) - ,(u'Inmuebles Comerciales', u'http://www.lanacion.com.ar/herramientas/rss/index.asp?categoria_id=1363' ) - ,(u'Countries' , u'http://www.lanacion.com.ar/herramientas/rss/index.asp?categoria_id=1348' ) - ,(u'adnCultura' , u'http://www.lanacion.com.ar/herramientas/rss/index.asp?categoria_id=6734' ) - ,(u'The Wall Street Journal Americas', u'http://www.lanacion.com.ar/herramientas/rss/index.asp?categoria_id=6373' ) - ,(u'Estilo de vida' , u'http://www.lanacion.com.ar/herramientas/rss/index.asp?categoria_id=7353' ) - ,(u'Management' , u'http://www.lanacion.com.ar/herramientas/rss/index.asp?categoria_id=7380' ) - ,(u'Bicentenario' , u'http://www.lanacion.com.ar/herramientas/rss/index.asp?categoria_id=7276' ) + (u'Ultimas Noticias' , u'http://servicios.lanacion.com.ar/herramientas/rss/origen=2' ) + ,(u'Politica' , u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=30' ) + ,(u'Deportes' , u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=131' ) + ,(u'Economia' , u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=272' ) + ,(u'Informacion General' , u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=21' ) + ,(u'Cultura' , u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=1' ) + ,(u'Opinion' , u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=28' ) + ,(u'Espectaculos' , u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=120' ) + ,(u'Exterior' , u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=7' ) + ,(u'Ciencia&Salud' , u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=498' ) + ,(u'Revista' , u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=494' ) + ,(u'Enfoques' , u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=421' ) + ,(u'Comercio Exterior' , u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=347' ) + ,(u'Tecnologia' , u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=432' ) + ,(u'Arquitectura' , u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=366' ) + ,(u'Turismo' , u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=504' ) + ,(u'Al volante' , u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=371' ) + ,(u'El Campo' , u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=337' ) + ,(u'Moda y Belleza' , u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=1312') + ,(u'Inmuebles Comerciales', u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=1363') + ,(u'Countries' , u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=1348') + ,(u'adnCultura' , u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=6734') + ,(u'The WSJ Americas' , u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=6373') + ,(u'Comunidad' , u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=1344') + ,(u'Management' , u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=7380') + ,(u'Bicentenario' , u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=7276') ] + + def get_browser(self): + br = BasicNewsRecipe.get_browser() + br.set_debug_redirects(True) + br.set_debug_responses(True) + br.set_debug_http(True) + return br + + def get_article_url(self, article): + link = BasicNewsRecipe.get_article_url(self,article) + if link.startswith('http://blogs.lanacion') and not link.endswith('/'): + return None + return link + def preprocess_html(self, soup): for item in soup.findAll(style=True): del item['style'] - return self.adeify_images(soup) + for item in soup.findAll('a'): + limg = item.find('img') + if item.string is not None: + str = item.string + item.replaceWith(str) + else: + if limg: + item.name = 'div' + item.attrs = [] + else: + str = self.tag_to_string(item) + item.replaceWith(str) + for item in soup.findAll('img'): + if not item.has_key('alt'): + item['alt'] = 'image' + return soup diff --git a/resources/recipes/monden.recipe b/resources/recipes/monden.recipe new file mode 100644 index 0000000000..22764ffe47 --- /dev/null +++ b/resources/recipes/monden.recipe @@ -0,0 +1,66 @@ +# -*- coding: utf-8 -*- +#!/usr/bin/env python + +__license__ = 'GPL v3' +__copyright__ = u'2011, Silviu Cotoar\u0103' +''' +monden.info +''' + +from calibre.web.feeds.news import BasicNewsRecipe + +class Monden(BasicNewsRecipe): + title = u'Monden' + __author__ = u'Silviu Cotoar\u0103' + description = u'Arti\u015fti, interviuri, concerte.. MUZIC\u0102' + publisher = u'Monden' + oldest_article = 25 + language = 'ro' + max_articles_per_feed = 100 + no_stylesheets = True + use_embedded_content = False + category = 'Ziare,Stiri,Muzica' + encoding = 'utf-8' + cover_url = 'http://www.monden.info/wp-content/uploads/2009/04/mondeninfo-logo.jpg' + + conversion_options = { + 'comments' : description + ,'tags' : category + ,'language' : language + ,'publisher' : publisher + } + + extra_css = ''' + h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;} + h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;} + .byline {font-family:Arial,Helvetica,sans-serif; font-size:xx-small;} + .date {font-family:Arial,Helvetica,sans-serif; font-size:xx-small;} + p{font-family:Arial,Helvetica,sans-serif;font-size:small;} + .copyright {font-family:Arial,Helvetica,sans-serif;font-size:xx-small;text-align:center} + .story{font-family:Arial,Helvetica,sans-serif;font-size:small;} + .entry-asset asset hentry{font-family:Arial,Helvetica,sans-serif;font-size:small;} + .pagebody{font-family:Arial,Helvetica,sans-serif;font-size:small;} + .maincontentcontainer{font-family:Arial,Helvetica,sans-serif;font-size:small;} + .story-body{font-family:Arial,Helvetica,sans-serif;font-size:small;} + body{font-family:Helvetica,Arial,sans-serif;font-size:small;} + ''' + + keep_only_tags = [ + dict(name='div', attrs={'id':'content'}) + ] + + remove_tags = [ + dict(name='div', attrs={'class':['postAuthor']}) + , dict(name='div', attrs={'class':['postLike']}) + ] + + remove_tags_after = [ + dict(name='div', attrs={'class':['postLike']}) + ] + + feeds = [ + (u'Feeds', u'http://www.monden.info/feed/') + ] + + def preprocess_html(self, soup): + return self.adeify_images(soup) diff --git a/resources/recipes/oakland_north.recipe b/resources/recipes/oakland_north.recipe new file mode 100644 index 0000000000..0ad165be40 --- /dev/null +++ b/resources/recipes/oakland_north.recipe @@ -0,0 +1,23 @@ +from calibre.web.feeds.news import BasicNewsRecipe +import re + +class AdvancedUserRecipe1299640653(BasicNewsRecipe): + title = u'Oakland North' + oldest_article = 30 + max_articles_per_feed = 100 + + language = 'en' + __author__ = 'noah' + description = 'Oakland North' + category = 'news' + no_stylesheets = True + + masthead_url = 'http://oaklandnorth.net/wp-content/themes/oaklandnorth/images/masthead.png' + + keep_only_tags = [dict(name='div', attrs={'class':re.compile(r'\bpost\b(?!-)', re.IGNORECASE)})] + + remove_tags_after = [dict(name='p', attrs={'class':'post-postscript'})] + + remove_tags = [dict(name='p', attrs={'class':'post-postscript'})] + + feeds = [(u'All Headlines', u'http://oaklandnorth.net/feed/')] diff --git a/resources/recipes/promotor.recipe b/resources/recipes/promotor.recipe new file mode 100644 index 0000000000..11a8499d7b --- /dev/null +++ b/resources/recipes/promotor.recipe @@ -0,0 +1,70 @@ +# -*- coding: utf-8 -*- +#!/usr/bin/env python + +__license__ = 'GPL v3' +__copyright__ = u'2011, Silviu Cotoar\u0103' +''' +promotor.ro +''' + +from calibre.web.feeds.news import BasicNewsRecipe + +class Promotor(BasicNewsRecipe): + title = u'Promotor' + __author__ = u'Silviu Cotoar\u0103' + description = u'Auto-moto' + publisher = u'Promotor' + oldest_article = 25 + language = 'ro' + max_articles_per_feed = 100 + no_stylesheets = True + use_embedded_content = False + category = 'Ziare,Reviste,TV,Auto' + encoding = 'utf-8' + cover_url = 'http://www.promotor.ro/images/logo_promotor.gif' + + conversion_options = { + 'comments' : description + ,'tags' : category + ,'language' : language + ,'publisher' : publisher + } + extra_css = ''' + h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;} + h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;} + .byline {font-family:Arial,Helvetica,sans-serif; font-size:xx-small;} + .date {font-family:Arial,Helvetica,sans-serif; font-size:xx-small;} + p{font-family:Arial,Helvetica,sans-serif;font-size:small;} + .copyright {font-family:Arial,Helvetica,sans-serif;font-size:xx-small;text-align:center} + .story{font-family:Arial,Helvetica,sans-serif;font-size:small;} + .entry-asset asset hentry{font-family:Arial,Helvetica,sans-serif;font-size:small;} + .pagebody{font-family:Arial,Helvetica,sans-serif;font-size:small;} + .maincontentcontainer{font-family:Arial,Helvetica,sans-serif;font-size:small;} + .story-body{font-family:Arial,Helvetica,sans-serif;font-size:small;} + body{font-family:Helvetica,Arial,sans-serif;font-size:small;} + ''' + + keep_only_tags = [ + dict(name='div', attrs={'class':'casetatitluarticol'}) + , dict(name='div', attrs={'style':'width: 273px; height: 210px; overflow: hidden; margin: 0pt auto;'}) + , dict(name='div', attrs={'class':'textb'}) + , dict(name='div', attrs={'class':'contentarticol'}) + ] + + remove_tags = [ + dict(name='td', attrs={'class':['connect_widget_vertical_center connect_widget_button_cell']}) + , dict(name='div', attrs={'class':['etichetagry']}) + , dict(name='span', attrs={'class':['textb']}) + ] + + remove_tags_after = [ + dict(name='div', attrs={'class':['etichetagry']}) + , dict(name='span', attrs={'class':['textb']}) + ] + + feeds = [ + (u'Feeds', u'http://www.promotor.ro/rss') + ] + + def preprocess_html(self, soup): + return self.adeify_images(soup) diff --git a/resources/recipes/timesnewroman.recipe b/resources/recipes/timesnewroman.recipe new file mode 100644 index 0000000000..12672aa888 --- /dev/null +++ b/resources/recipes/timesnewroman.recipe @@ -0,0 +1,52 @@ +# -*- coding: utf-8 -*- +#!/usr/bin/env python + +__license__ = 'GPL v3' +__copyright__ = u'2011, Silviu Cotoar\u0103' +''' +timesnewroman.ro +''' + +from calibre.web.feeds.news import BasicNewsRecipe + +class TimesNewRoman(BasicNewsRecipe): + title = u'Times New Roman' + __author__ = u'Silviu Cotoar\u0103' + description = u'Cotidian independent de umor voluntar' + publisher = u'Times New Roman' + oldest_article = 25 + language = 'ro' + max_articles_per_feed = 100 + no_stylesheets = True + use_embedded_content = False + category = 'Ziare,Reviste,Fun' + encoding = 'utf-8' + cover_url = 'http://www.timesnewroman.ro/templates/TNRV2/images/logo.gif' + + conversion_options = { + 'comments' : description + ,'tags' : category + ,'language' : language + ,'publisher' : publisher + } + + keep_only_tags = [ + dict(name='div', attrs={'id':'page'}) + ] + + remove_tags = [ + dict(name='p', attrs={'class':['articleinfo']}) + , dict(name='div',attrs={'class':['vergefacebooklike']}) + , dict(name='div', attrs={'class':'cleared'}) + ] + + remove_tags_after = [ + dict(name='div', attrs={'class':'cleared'}) + ] + + feeds = [ + (u'Feeds', u'http://www.timesnewroman.ro/index.php?format=feed&type=rss') + ] + + def preprocess_html(self, soup): + return self.adeify_images(soup) diff --git a/src/calibre/devices/__init__.py b/src/calibre/devices/__init__.py index 0d62a8f619..1918a36cc8 100644 --- a/src/calibre/devices/__init__.py +++ b/src/calibre/devices/__init__.py @@ -30,7 +30,6 @@ def strftime(epoch, zone=time.gmtime): def get_connected_device(): from calibre.customize.ui import device_plugins from calibre.devices.scanner import DeviceScanner - import uuid dev = None scanner = DeviceScanner() scanner.scan() @@ -48,7 +47,7 @@ def get_connected_device(): for d in connected_devices: try: - d.open(str(uuid.uuid4())) + d.open() except: continue else: diff --git a/src/calibre/devices/prs500/cli/main.py b/src/calibre/devices/prs500/cli/main.py index 8a73f3fa23..6d568b01a2 100755 --- a/src/calibre/devices/prs500/cli/main.py +++ b/src/calibre/devices/prs500/cli/main.py @@ -6,7 +6,7 @@ Provides a command-line and optional graphical interface to the SONY Reader PRS- For usage information run the script. """ -import StringIO, sys, time, os, uuid +import StringIO, sys, time, os from optparse import OptionParser from calibre import __version__, __appname__ @@ -213,7 +213,7 @@ def main(): for d in connected_devices: try: - d.open(str(uuid.uuid4())) + d.open(None) except: continue else: diff --git a/src/calibre/ebooks/__init__.py b/src/calibre/ebooks/__init__.py index 0ae640113a..c5bac936b5 100644 --- a/src/calibre/ebooks/__init__.py +++ b/src/calibre/ebooks/__init__.py @@ -25,7 +25,7 @@ class DRMError(ValueError): class ParserError(ValueError): pass -BOOK_EXTENSIONS = ['lrf', 'rar', 'zip', 'rtf', 'lit', 'txt', 'txtz', 'htm', 'xhtm', +BOOK_EXTENSIONS = ['lrf', 'rar', 'zip', 'rtf', 'lit', 'txt', 'txtz', 'text', 'htm', 'xhtm', 'html', 'xhtml', 'pdf', 'pdb', 'pdr', 'prc', 'mobi', 'azw', 'doc', 'epub', 'fb2', 'djvu', 'lrx', 'cbr', 'cbz', 'cbc', 'oebzip', 'rb', 'imp', 'odt', 'chm', 'tpz', 'azw1', 'pml', 'pmlz', 'mbp', 'tan', 'snb'] diff --git a/src/calibre/ebooks/oeb/transforms/structure.py b/src/calibre/ebooks/oeb/transforms/structure.py index 0db9b153df..fc338da692 100644 --- a/src/calibre/ebooks/oeb/transforms/structure.py +++ b/src/calibre/ebooks/oeb/transforms/structure.py @@ -13,6 +13,7 @@ from urlparse import urlparse from calibre.ebooks.oeb.base import XPNSMAP, TOC, XHTML, xml2text from calibre.ebooks import ConversionError +from calibre.utils.ordered_dict import OrderedDict def XPath(x): try: @@ -95,10 +96,8 @@ class DetectStructure(object): self.log.exception('Failed to mark chapter') def create_level_based_toc(self): - if self.opts.level1_toc is None: - return - for item in self.oeb.spine: - self.add_leveled_toc_items(item) + if self.opts.level1_toc is not None: + self.add_leveled_toc_items() def create_toc_from_chapters(self): counter = self.oeb.toc.next_play_order() @@ -145,49 +144,57 @@ class DetectStructure(object): return text, href - def add_leveled_toc_items(self, item): - level1 = XPath(self.opts.level1_toc)(item.data) - level1_order = [] - document = item - + def add_leveled_toc_items(self): + added = OrderedDict() + added2 = OrderedDict() counter = 1 - if level1: - added = {} - for elem in level1: + for document in self.oeb.spine: + previous_level1 = list(added.itervalues())[-1] if added else None + previous_level2 = list(added2.itervalues())[-1] if added2 else None + + for elem in XPath(self.opts.level1_toc)(document.data): text, _href = self.elem_to_link(document, elem, counter) counter += 1 if text: node = self.oeb.toc.add(text, _href, play_order=self.oeb.toc.next_play_order()) - level1_order.append(node) added[elem] = node #node.add(_('Top'), _href) - if self.opts.level2_toc is not None: - added2 = {} - level2 = list(XPath(self.opts.level2_toc)(document.data)) - for elem in level2: + + if self.opts.level2_toc is not None and added: + for elem in XPath(self.opts.level2_toc)(document.data): level1 = None for item in document.data.iterdescendants(): - if item in added.keys(): + if item in added: level1 = added[item] - elif item == elem and level1 is not None: + elif item == elem: + if level1 is None: + if previous_level1 is None: + break + level1 = previous_level1 text, _href = self.elem_to_link(document, elem, counter) counter += 1 if text: added2[elem] = level1.add(text, _href, play_order=self.oeb.toc.next_play_order()) - if self.opts.level3_toc is not None: - level3 = list(XPath(self.opts.level3_toc)(document.data)) - for elem in level3: + break + + if self.opts.level3_toc is not None and added2: + for elem in XPath(self.opts.level3_toc)(document.data): level2 = None for item in document.data.iterdescendants(): - if item in added2.keys(): + if item in added2: level2 = added2[item] - elif item == elem and level2 is not None: + elif item == elem: + if level2 is None: + if previous_level2 is None: + break + level2 = previous_level2 text, _href = \ self.elem_to_link(document, elem, counter) counter += 1 if text: level2.add(text, _href, - play_order=self.oeb.toc.next_play_order()) + play_order=self.oeb.toc.next_play_order()) + break diff --git a/src/calibre/ebooks/rtf2xml/tokenize.py b/src/calibre/ebooks/rtf2xml/tokenize.py index 3dcaf0fcb1..25640586db 100755 --- a/src/calibre/ebooks/rtf2xml/tokenize.py +++ b/src/calibre/ebooks/rtf2xml/tokenize.py @@ -46,7 +46,8 @@ class Tokenize: def __remove_uc_chars(self, startchar, token): for i in xrange(startchar, len(token)): - if token[i] == " ": + #handle the case of an uc char with a terminating blank before ansi char + if token[i] == " " and self.__uc_char: continue elif self.__uc_char: self.__uc_char -= 1 diff --git a/src/calibre/ebooks/txt/input.py b/src/calibre/ebooks/txt/input.py index 1c49eb9b35..3c256fda7a 100644 --- a/src/calibre/ebooks/txt/input.py +++ b/src/calibre/ebooks/txt/input.py @@ -22,7 +22,7 @@ class TXTInput(InputFormatPlugin): name = 'TXT Input' author = 'John Schember' description = 'Convert TXT files to HTML' - file_types = set(['txt', 'txtz']) + file_types = set(['txt', 'txtz', 'text']) options = set([ OptionRecommendation(name='paragraph_type', recommended_value='auto', diff --git a/src/calibre/gui2/dialogs/choose_plugin_toolbars.py b/src/calibre/gui2/dialogs/choose_plugin_toolbars.py new file mode 100644 index 0000000000..ddf8e162e8 --- /dev/null +++ b/src/calibre/gui2/dialogs/choose_plugin_toolbars.py @@ -0,0 +1,61 @@ +#!/usr/bin/env python +# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai + +from __future__ import (unicode_literals, division, absolute_import, + print_function) + +__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net' +__docformat__ = 'restructuredtext en' +__license__ = 'GPL v3' + + +from PyQt4.Qt import QDialog, QVBoxLayout, QLabel, QDialogButtonBox, \ + QListWidget, QAbstractItemView +from PyQt4 import QtGui + +class ChoosePluginToolbarsDialog(QDialog): + + def __init__(self, parent, plugin, locations): + QDialog.__init__(self, parent) + self.locations = locations + + self.setWindowTitle( + _('Add "%s" to toolbars or menus')%plugin.name) + + self._layout = QVBoxLayout(self) + self.setLayout(self._layout) + + self._header_label = QLabel( + _('Select the toolbars and/or menus to add %s to:') % + plugin.name) + self._layout.addWidget(self._header_label) + + self._locations_list = QListWidget(self) + self._locations_list.setSelectionMode(QAbstractItemView.MultiSelection) + sizePolicy = QtGui.QSizePolicy(QtGui.QSizePolicy.Preferred, + QtGui.QSizePolicy.Minimum) + sizePolicy.setHorizontalStretch(0) + sizePolicy.setVerticalStretch(0) + self._locations_list.setSizePolicy(sizePolicy) + for key, text in locations: + self._locations_list.addItem(text) + self._layout.addWidget(self._locations_list) + + self._footer_label = QLabel( + _('You can also customise the plugin locations ' + 'using Preferences -> Customise the toolbar')) + self._layout.addWidget(self._footer_label) + + button_box = QDialogButtonBox(QDialogButtonBox.Ok | + QDialogButtonBox.Cancel) + button_box.accepted.connect(self.accept) + button_box.rejected.connect(self.reject) + self._layout.addWidget(button_box) + self.resize(self.sizeHint()) + + def selected_locations(self): + selected = [] + for row in self._locations_list.selectionModel().selectedRows(): + selected.append(self.locations[row.row()]) + return selected + diff --git a/src/calibre/gui2/preferences/plugins.py b/src/calibre/gui2/preferences/plugins.py index acf42fee16..85221766f2 100644 --- a/src/calibre/gui2/preferences/plugins.py +++ b/src/calibre/gui2/preferences/plugins.py @@ -16,9 +16,10 @@ from calibre.customize.ui import initialized_plugins, is_disabled, enable_plugin disable_plugin, plugin_customization, add_plugin, \ remove_plugin from calibre.gui2 import NONE, error_dialog, info_dialog, choose_files, \ - question_dialog + question_dialog, gprefs from calibre.utils.search_query_parser import SearchQueryParser from calibre.utils.icu import lower +from calibre.utils.ordered_dict import OrderedDict class PluginModel(QAbstractItemModel, SearchQueryParser): # {{{ @@ -281,6 +282,7 @@ class ConfigWidget(ConfigWidgetBase, Ui_Form): self._plugin_model.populate() self._plugin_model.reset() self.changed_signal.emit() + self.check_for_add_to_toolbars(plugin) info_dialog(self, _('Success'), _('Plugin {0} successfully installed under ' ' {1} plugins. You may have to restart calibre ' @@ -342,6 +344,37 @@ class ConfigWidget(ConfigWidgetBase, Ui_Form): plugin.name + _(' cannot be removed. It is a ' 'builtin plugin. Try disabling it instead.')).exec_() + def check_for_add_to_toolbars(self, plugin): + from calibre.gui2.preferences.toolbar import ConfigWidget + from calibre.customize import InterfaceActionBase + + if not isinstance(plugin, InterfaceActionBase): + return + + all_locations = OrderedDict(ConfigWidget.LOCATIONS) + plugin_action = plugin.load_actual_plugin(self.gui) + installed_actions = OrderedDict([ + (key, list(gprefs.get('action-layout-'+key, []))) + for key in all_locations]) + + # If already installed in a GUI container, do nothing + for action_names in installed_actions.itervalues(): + if plugin_action.name in action_names: + return + + allowed_locations = [(key, text) for key, text in + all_locations.iteritems() if key + not in plugin_action.dont_add_to] + if not allowed_locations: + return # This plugin doesn't want to live in the GUI + + from calibre.gui2.dialogs.choose_plugin_toolbars import ChoosePluginToolbarsDialog + d = ChoosePluginToolbarsDialog(self, plugin_action, allowed_locations) + if d.exec_() == d.Accepted: + for key, text in d.selected_locations(): + installed_actions = list(gprefs.get('action-layout-'+key, [])) + installed_actions.append(plugin_action.name) + gprefs['action-layout-'+key] = tuple(installed_actions) if __name__ == '__main__': from PyQt4.Qt import QApplication