diff --git a/resources/images/news/cotidianul.png b/resources/images/news/cotidianul.png new file mode 100644 index 0000000000..2e57dbde54 Binary files /dev/null and b/resources/images/news/cotidianul.png differ diff --git a/resources/images/news/ele.png b/resources/images/news/ele.png new file mode 100644 index 0000000000..82f66b5caa Binary files /dev/null and b/resources/images/news/ele.png differ diff --git a/resources/images/news/felicia.png b/resources/images/news/felicia.png new file mode 100644 index 0000000000..4bc1fd35d8 Binary files /dev/null and b/resources/images/news/felicia.png differ diff --git a/resources/images/news/financiarul.png b/resources/images/news/financiarul.png new file mode 100644 index 0000000000..1d91a72a34 Binary files /dev/null and b/resources/images/news/financiarul.png differ diff --git a/resources/images/news/imperatortravel.png b/resources/images/news/imperatortravel.png new file mode 100644 index 0000000000..c459759ed0 Binary files /dev/null and b/resources/images/news/imperatortravel.png differ diff --git a/resources/images/news/monden.png b/resources/images/news/monden.png new file mode 100644 index 0000000000..fcf8ad42ae Binary files /dev/null and b/resources/images/news/monden.png differ diff --git a/resources/images/news/promotor.png b/resources/images/news/promotor.png new file mode 100644 index 0000000000..a479cf135b Binary files /dev/null and b/resources/images/news/promotor.png differ diff --git a/resources/images/news/timesnewroman.png b/resources/images/news/timesnewroman.png new file mode 100644 index 0000000000..6ba02939b4 Binary files /dev/null and b/resources/images/news/timesnewroman.png differ diff --git a/resources/recipes/cotidianul.recipe b/resources/recipes/cotidianul.recipe new file mode 100644 index 0000000000..f00196532c --- /dev/null +++ b/resources/recipes/cotidianul.recipe @@ -0,0 +1,69 @@ +# -*- coding: utf-8 -*- +#!/usr/bin/env python + +__license__ = 'GPL v3' +__copyright__ = u'2011, Silviu Cotoar\u0103' +''' +cotidianul.ro +''' + +from calibre.web.feeds.news import BasicNewsRecipe + +class Cotidianul(BasicNewsRecipe): + title = u'Cotidianul' + __author__ = u'Silviu Cotoar\u0103' + description = u'' + publisher = u'Cotidianul' + oldest_article = 25 + language = 'ro' + max_articles_per_feed = 100 + no_stylesheets = True + use_embedded_content = False + category = 'Ziare,Stiri' + encoding = 'utf-8' + cover_url = 'http://www.cotidianul.ro/images/cotidianul.png' + + conversion_options = { + 'comments' : description + ,'tags' : category + ,'language' : language + ,'publisher' : publisher + } + + extra_css = ''' + h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;} + h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;} + .byline {font-family:Arial,Helvetica,sans-serif; font-size:xx-small;} + .date {font-family:Arial,Helvetica,sans-serif; font-size:xx-small;} + p{font-family:Arial,Helvetica,sans-serif;font-size:small;} + .copyright {font-family:Arial,Helvetica,sans-serif;font-size:xx-small;text-align:center} + .story{font-family:Arial,Helvetica,sans-serif;font-size:small;} + .entry-asset asset hentry{font-family:Arial,Helvetica,sans-serif;font-size:small;} + .pagebody{font-family:Arial,Helvetica,sans-serif;font-size:small;} + .maincontentcontainer{font-family:Arial,Helvetica,sans-serif;font-size:small;} + .story-body{font-family:Arial,Helvetica,sans-serif;font-size:small;} + body{font-family:Helvetica,Arial,sans-serif;font-size:small;} + ''' + + keep_only_tags = [ + dict(name='div', attrs={'class':'titlu'}) + , dict(name='div', attrs={'class':'gallery clearfix'}) + , dict(name='div', attrs={'align':'justify'}) + ] + + remove_tags = [ + dict(name='div', attrs={'class':['space']}) + , dict(name='div', attrs={'id':['title_desc']}) + ] + + remove_tags_after = [ + dict(name='div', attrs={'class':['space']}) + , dict(name='span', attrs={'class':['date']}) + ] + + feeds = [ + (u'Feeds', u'http://www.cotidianul.ro/rssfeed/ToateStirile.xml') + ] + + def preprocess_html(self, soup): + return self.adeify_images(soup) diff --git a/resources/recipes/ele.recipe b/resources/recipes/ele.recipe new file mode 100644 index 0000000000..ea8954366b --- /dev/null +++ b/resources/recipes/ele.recipe @@ -0,0 +1,58 @@ +# -*- coding: utf-8 -*- +#!/usr/bin/env python + +__license__ = 'GPL v3' +__copyright__ = u'2011, Silviu Cotoar\u0103' +''' +ele.ro +''' + +from calibre.web.feeds.news import BasicNewsRecipe + +class Ele(BasicNewsRecipe): + title = u'Ele' + __author__ = u'Silviu Cotoar\u0103' + description = u'Dezv\u0103luie ceea ce e\u015fti' + publisher = u'Ele' + oldest_article = 25 + language = 'ro' + max_articles_per_feed = 100 + no_stylesheets = True + use_embedded_content = False + category = 'Ziare,Femei' + encoding = 'utf-8' + cover_url = 'http://www.tripmedia.ro/tripadmin/photos/logo_ele_mare.jpg' + + conversion_options = { + 'comments' : description + ,'tags' : category + ,'language' : language + ,'publisher' : publisher + } + + extra_css = ''' + h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;} + h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;} + .byline {font-family:Arial,Helvetica,sans-serif; font-size:xx-small;} + .date {font-family:Arial,Helvetica,sans-serif; font-size:xx-small;} + p{font-family:Arial,Helvetica,sans-serif;font-size:small;} + .copyright {font-family:Arial,Helvetica,sans-serif;font-size:xx-small;text-align:center} + .story{font-family:Arial,Helvetica,sans-serif;font-size:small;} + .entry-asset asset hentry{font-family:Arial,Helvetica,sans-serif;font-size:small;} + .pagebody{font-family:Arial,Helvetica,sans-serif;font-size:small;} + .maincontentcontainer{font-family:Arial,Helvetica,sans-serif;font-size:small;} + .story-body{font-family:Arial,Helvetica,sans-serif;font-size:small;} + body{font-family:Helvetica,Arial,sans-serif;font-size:small;} + ''' + + keep_only_tags = [ + dict(name='h1', attrs={'class':'article_title'}) + , dict(name='div', attrs={'class':'article_text'}) + ] + + feeds = [ + (u'Feeds', u'http://www.ele.ro/rss_must_read') + ] + + def preprocess_html(self, soup): + return self.adeify_images(soup) diff --git a/resources/recipes/felicia.recipe b/resources/recipes/felicia.recipe new file mode 100644 index 0000000000..0772e38494 --- /dev/null +++ b/resources/recipes/felicia.recipe @@ -0,0 +1,48 @@ +# -*- coding: utf-8 -*- +#!/usr/bin/env python + +__license__ = 'GPL v3' +__copyright__ = u'2011, Silviu Cotoar\u0103' +''' +revistafelicia.ro +''' + +from calibre.web.feeds.news import BasicNewsRecipe + +class Felicia(BasicNewsRecipe): + title = u'Revista Felicia' + __author__ = u'Silviu Cotoar\u0103' + description = u'O revist\u0103 pentru sufletul t\u0103u' + publisher = u'Revista Felicia' + oldest_article = 25 + language = 'ro' + max_articles_per_feed = 100 + no_stylesheets = True + use_embedded_content = False + category = 'Ziare,Reviste' + encoding = 'utf-8' + cover_url = 'http://www.3waves.net/uploads/image/logo-revista-felicia_03.jpg' + + conversion_options = { + 'comments' : description + ,'tags' : category + ,'language' : language + ,'publisher' : publisher + } + + keep_only_tags = [ + dict(name='div', attrs={'class':'header'}) + , dict(name='div', attrs={'id':'contentArticol'}) + ] + + remove_tags = [ + dict(name='img',attrs={'src':['http://www.revistafelicia.ro/templates/default/images/hdr_ultimul_nr.jpg']}) + , dict(name='div',attrs={'class':['content']}) + ] + + feeds = [ + (u'Feeds', u'http://www.revistafelicia.ro/rss') + ] + + def preprocess_html(self, soup): + return self.adeify_images(soup) diff --git a/resources/recipes/financiarul.recipe b/resources/recipes/financiarul.recipe new file mode 100644 index 0000000000..807f771408 --- /dev/null +++ b/resources/recipes/financiarul.recipe @@ -0,0 +1,55 @@ +# -*- coding: utf-8 -*- +#!/usr/bin/env python + +__license__ = 'GPL v3' +__copyright__ = u'2011, Silviu Cotoar\u0103' +''' +financiarul.com +''' + +from calibre.web.feeds.news import BasicNewsRecipe + +class Financiarul(BasicNewsRecipe): + title = u'Financiarul' + __author__ = u'Silviu Cotoar\u0103' + description = u'FIN.ro' + publisher = u'Financiarul' + oldest_article = 25 + language = 'ro' + max_articles_per_feed = 100 + no_stylesheets = True + use_embedded_content = False + category = 'Ziare,Stiri' + encoding = 'utf-8' + cover_url = 'http://www.financiarul.com/templates/default/images/logo.png' + + conversion_options = { + 'comments' : description + ,'tags' : category + ,'language' : language + ,'publisher' : publisher + } + + keep_only_tags = [ + dict(name='div', attrs={'class':'col2ContentLeftL'}) + ] + + remove_tags = [ + dict(name='div',attrs={'class':['infoArticol']}) + , dict(name='ul', attrs={'class':'navSectiuni'}) + , dict(name='div', attrs={'class':'separator separatorTop'}) + , dict(name='div', attrs={'class':'infoArticol infoArticolBottom'}) + , dict(name='ul', attrs={'class':['related']}) + , dict(name='div', attrs={'class':['slot panel300 panelGri300 panelGri300s panelGri300sm']}) + ] + + remove_tags_after = [ + dict(name='ul', attrs={'class':['related']}) + ] + + feeds = [ + (u'Feeds', u'http://www.financiarul.com/rss') + ] + + def preprocess_html(self, soup): + return self.adeify_images(soup) diff --git a/resources/recipes/imperatortravel.recipe b/resources/recipes/imperatortravel.recipe new file mode 100644 index 0000000000..2b6d323bf5 --- /dev/null +++ b/resources/recipes/imperatortravel.recipe @@ -0,0 +1,68 @@ +# -*- coding: utf-8 -*- +#!/usr/bin/env python + +__license__ = 'GPL v3' +__copyright__ = u'2011, Silviu Cotoar\u0103' +''' +imperatortravel.ro +''' + +from calibre.web.feeds.news import BasicNewsRecipe + +class Imperatortravel(BasicNewsRecipe): + title = u'Imperator Travel' + __author__ = u'Silviu Cotoar\u0103' + description = u'C\u0103l\u0103torii' + publisher = u'Imperator Travel' + oldest_article = 25 + language = 'ro' + max_articles_per_feed = 100 + no_stylesheets = True + use_embedded_content = False + category = 'Ziare,Stiri,Turism,Calatorii' + encoding = 'utf-8' + cover_url = 'http://www.imperatortravel.ro/images/header-1.jpg' + + conversion_options = { + 'comments' : description + ,'tags' : category + ,'language' : language + ,'publisher' : publisher + } + + extra_css = ''' + h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;} + h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;} + .byline {font-family:Arial,Helvetica,sans-serif; font-size:xx-small;} + .date {font-family:Arial,Helvetica,sans-serif; font-size:xx-small;} + p{font-family:Arial,Helvetica,sans-serif;font-size:small;} + .copyright {font-family:Arial,Helvetica,sans-serif;font-size:xx-small;text-align:center} + .story{font-family:Arial,Helvetica,sans-serif;font-size:small;} + .entry-asset asset hentry{font-family:Arial,Helvetica,sans-serif;font-size:small;} + .pagebody{font-family:Arial,Helvetica,sans-serif;font-size:small;} + .maincontentcontainer{font-family:Arial,Helvetica,sans-serif;font-size:small;} + .story-body{font-family:Arial,Helvetica,sans-serif;font-size:small;} + body{font-family:Helvetica,Arial,sans-serif;font-size:small;} + ''' + + keep_only_tags = [ + dict(name='div', attrs={'class':'article first_main_article'}) + ] + + remove_tags = [ + dict(name='div', attrs={'class':['meta']}) + , dict(name='body', attrs={'class':['transparent_widget ff3 win Locale_en_US']}) + , dict(name='div', attrs={'class':['connect_widget']}) + , dict(name='ul', attrs={'class':['similar-posts']}) + ] + + remove_tags_after = [ + dict(name='ul', attrs={'class':['similar-posts']}) + ] + + feeds = [ + (u'Feeds', u'http://feeds.feedburner.com/ImperatorTravels') + ] + + def preprocess_html(self, soup): + return self.adeify_images(soup) diff --git a/resources/recipes/monden.recipe b/resources/recipes/monden.recipe new file mode 100644 index 0000000000..22764ffe47 --- /dev/null +++ b/resources/recipes/monden.recipe @@ -0,0 +1,66 @@ +# -*- coding: utf-8 -*- +#!/usr/bin/env python + +__license__ = 'GPL v3' +__copyright__ = u'2011, Silviu Cotoar\u0103' +''' +monden.info +''' + +from calibre.web.feeds.news import BasicNewsRecipe + +class Monden(BasicNewsRecipe): + title = u'Monden' + __author__ = u'Silviu Cotoar\u0103' + description = u'Arti\u015fti, interviuri, concerte.. MUZIC\u0102' + publisher = u'Monden' + oldest_article = 25 + language = 'ro' + max_articles_per_feed = 100 + no_stylesheets = True + use_embedded_content = False + category = 'Ziare,Stiri,Muzica' + encoding = 'utf-8' + cover_url = 'http://www.monden.info/wp-content/uploads/2009/04/mondeninfo-logo.jpg' + + conversion_options = { + 'comments' : description + ,'tags' : category + ,'language' : language + ,'publisher' : publisher + } + + extra_css = ''' + h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;} + h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;} + .byline {font-family:Arial,Helvetica,sans-serif; font-size:xx-small;} + .date {font-family:Arial,Helvetica,sans-serif; font-size:xx-small;} + p{font-family:Arial,Helvetica,sans-serif;font-size:small;} + .copyright {font-family:Arial,Helvetica,sans-serif;font-size:xx-small;text-align:center} + .story{font-family:Arial,Helvetica,sans-serif;font-size:small;} + .entry-asset asset hentry{font-family:Arial,Helvetica,sans-serif;font-size:small;} + .pagebody{font-family:Arial,Helvetica,sans-serif;font-size:small;} + .maincontentcontainer{font-family:Arial,Helvetica,sans-serif;font-size:small;} + .story-body{font-family:Arial,Helvetica,sans-serif;font-size:small;} + body{font-family:Helvetica,Arial,sans-serif;font-size:small;} + ''' + + keep_only_tags = [ + dict(name='div', attrs={'id':'content'}) + ] + + remove_tags = [ + dict(name='div', attrs={'class':['postAuthor']}) + , dict(name='div', attrs={'class':['postLike']}) + ] + + remove_tags_after = [ + dict(name='div', attrs={'class':['postLike']}) + ] + + feeds = [ + (u'Feeds', u'http://www.monden.info/feed/') + ] + + def preprocess_html(self, soup): + return self.adeify_images(soup) diff --git a/resources/recipes/promotor.recipe b/resources/recipes/promotor.recipe new file mode 100644 index 0000000000..11a8499d7b --- /dev/null +++ b/resources/recipes/promotor.recipe @@ -0,0 +1,70 @@ +# -*- coding: utf-8 -*- +#!/usr/bin/env python + +__license__ = 'GPL v3' +__copyright__ = u'2011, Silviu Cotoar\u0103' +''' +promotor.ro +''' + +from calibre.web.feeds.news import BasicNewsRecipe + +class Promotor(BasicNewsRecipe): + title = u'Promotor' + __author__ = u'Silviu Cotoar\u0103' + description = u'Auto-moto' + publisher = u'Promotor' + oldest_article = 25 + language = 'ro' + max_articles_per_feed = 100 + no_stylesheets = True + use_embedded_content = False + category = 'Ziare,Reviste,TV,Auto' + encoding = 'utf-8' + cover_url = 'http://www.promotor.ro/images/logo_promotor.gif' + + conversion_options = { + 'comments' : description + ,'tags' : category + ,'language' : language + ,'publisher' : publisher + } + extra_css = ''' + h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;} + h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;} + .byline {font-family:Arial,Helvetica,sans-serif; font-size:xx-small;} + .date {font-family:Arial,Helvetica,sans-serif; font-size:xx-small;} + p{font-family:Arial,Helvetica,sans-serif;font-size:small;} + .copyright {font-family:Arial,Helvetica,sans-serif;font-size:xx-small;text-align:center} + .story{font-family:Arial,Helvetica,sans-serif;font-size:small;} + .entry-asset asset hentry{font-family:Arial,Helvetica,sans-serif;font-size:small;} + .pagebody{font-family:Arial,Helvetica,sans-serif;font-size:small;} + .maincontentcontainer{font-family:Arial,Helvetica,sans-serif;font-size:small;} + .story-body{font-family:Arial,Helvetica,sans-serif;font-size:small;} + body{font-family:Helvetica,Arial,sans-serif;font-size:small;} + ''' + + keep_only_tags = [ + dict(name='div', attrs={'class':'casetatitluarticol'}) + , dict(name='div', attrs={'style':'width: 273px; height: 210px; overflow: hidden; margin: 0pt auto;'}) + , dict(name='div', attrs={'class':'textb'}) + , dict(name='div', attrs={'class':'contentarticol'}) + ] + + remove_tags = [ + dict(name='td', attrs={'class':['connect_widget_vertical_center connect_widget_button_cell']}) + , dict(name='div', attrs={'class':['etichetagry']}) + , dict(name='span', attrs={'class':['textb']}) + ] + + remove_tags_after = [ + dict(name='div', attrs={'class':['etichetagry']}) + , dict(name='span', attrs={'class':['textb']}) + ] + + feeds = [ + (u'Feeds', u'http://www.promotor.ro/rss') + ] + + def preprocess_html(self, soup): + return self.adeify_images(soup) diff --git a/resources/recipes/timesnewroman.recipe b/resources/recipes/timesnewroman.recipe new file mode 100644 index 0000000000..12672aa888 --- /dev/null +++ b/resources/recipes/timesnewroman.recipe @@ -0,0 +1,52 @@ +# -*- coding: utf-8 -*- +#!/usr/bin/env python + +__license__ = 'GPL v3' +__copyright__ = u'2011, Silviu Cotoar\u0103' +''' +timesnewroman.ro +''' + +from calibre.web.feeds.news import BasicNewsRecipe + +class TimesNewRoman(BasicNewsRecipe): + title = u'Times New Roman' + __author__ = u'Silviu Cotoar\u0103' + description = u'Cotidian independent de umor voluntar' + publisher = u'Times New Roman' + oldest_article = 25 + language = 'ro' + max_articles_per_feed = 100 + no_stylesheets = True + use_embedded_content = False + category = 'Ziare,Reviste,Fun' + encoding = 'utf-8' + cover_url = 'http://www.timesnewroman.ro/templates/TNRV2/images/logo.gif' + + conversion_options = { + 'comments' : description + ,'tags' : category + ,'language' : language + ,'publisher' : publisher + } + + keep_only_tags = [ + dict(name='div', attrs={'id':'page'}) + ] + + remove_tags = [ + dict(name='p', attrs={'class':['articleinfo']}) + , dict(name='div',attrs={'class':['vergefacebooklike']}) + , dict(name='div', attrs={'class':'cleared'}) + ] + + remove_tags_after = [ + dict(name='div', attrs={'class':'cleared'}) + ] + + feeds = [ + (u'Feeds', u'http://www.timesnewroman.ro/index.php?format=feed&type=rss') + ] + + def preprocess_html(self, soup): + return self.adeify_images(soup) diff --git a/src/calibre/ebooks/__init__.py b/src/calibre/ebooks/__init__.py index 0ae640113a..c5bac936b5 100644 --- a/src/calibre/ebooks/__init__.py +++ b/src/calibre/ebooks/__init__.py @@ -25,7 +25,7 @@ class DRMError(ValueError): class ParserError(ValueError): pass -BOOK_EXTENSIONS = ['lrf', 'rar', 'zip', 'rtf', 'lit', 'txt', 'txtz', 'htm', 'xhtm', +BOOK_EXTENSIONS = ['lrf', 'rar', 'zip', 'rtf', 'lit', 'txt', 'txtz', 'text', 'htm', 'xhtm', 'html', 'xhtml', 'pdf', 'pdb', 'pdr', 'prc', 'mobi', 'azw', 'doc', 'epub', 'fb2', 'djvu', 'lrx', 'cbr', 'cbz', 'cbc', 'oebzip', 'rb', 'imp', 'odt', 'chm', 'tpz', 'azw1', 'pml', 'pmlz', 'mbp', 'tan', 'snb'] diff --git a/src/calibre/ebooks/txt/input.py b/src/calibre/ebooks/txt/input.py index 1c49eb9b35..3c256fda7a 100644 --- a/src/calibre/ebooks/txt/input.py +++ b/src/calibre/ebooks/txt/input.py @@ -22,7 +22,7 @@ class TXTInput(InputFormatPlugin): name = 'TXT Input' author = 'John Schember' description = 'Convert TXT files to HTML' - file_types = set(['txt', 'txtz']) + file_types = set(['txt', 'txtz', 'text']) options = set([ OptionRecommendation(name='paragraph_type', recommended_value='auto',