From 647c24b706efd5a44b990e11b4e3951a23556ee9 Mon Sep 17 00:00:00 2001 From: John Schember Date: Mon, 14 Feb 2011 20:24:21 -0500 Subject: [PATCH 1/3] TXTZ Import plugin. --- src/calibre/customize/builtins.py | 66 ++++++++++++++++++++++++++++++- 1 file changed, 64 insertions(+), 2 deletions(-) diff --git a/src/calibre/customize/builtins.py b/src/calibre/customize/builtins.py index 1dd575f45b..87c83eff52 100644 --- a/src/calibre/customize/builtins.py +++ b/src/calibre/customize/builtins.py @@ -2,11 +2,13 @@ import os.path __license__ = 'GPL v3' __copyright__ = '2008, Kovid Goyal ' -import textwrap, os, glob, functools +import textwrap, os, glob, functools, re +from calibre import guess_type from calibre.customize import FileTypePlugin, MetadataReaderPlugin, \ MetadataWriterPlugin, PreferencesPlugin, InterfaceActionBase from calibre.constants import numeric_version from calibre.ebooks.metadata.archive import ArchiveExtract, get_cbz_metadata +from calibre.ebooks.oeb.base import OEB_IMAGES # To archive plugins {{{ class HTML2ZIP(FileTypePlugin): @@ -82,6 +84,66 @@ class PML2PMLZ(FileTypePlugin): return of.name +class TXT2TXTZ(FileTypePlugin): + name = 'TXT to TXTZ' + author = 'John Schember' + description = _('Create a TXTZ archive when a TXT file is imported ' + 'containing Markdown or Textile references to images. The referenced ' + 'images as well as the TXT file are added to the archive.') + version = numeric_version + file_types = set(['txt']) + supported_platforms = ['windows', 'osx', 'linux'] + on_import = True + + def _get_image_references(self, txt, base_dir): + images = [] + + # Textile + for m in re.finditer(ur'(?mu)(?:[\[{])?\!(?:\. )?(?P[^\s(!]+)\s?(?:\(([^\)]+)\))?\!(?::(\S+))?(?:[\]}]|(?=\s|$))', txt): + path = m.group('path') + if path and not os.path.isabs(path) and guess_type(path)[0] in OEB_IMAGES and os.path.exists(os.path.join(base_dir, path)): + images.append(path) + + # Markdown inline + for m in re.finditer(ur'(?mu)\!\[([^\]\[]*(\[[^\]\[]*(\[[^\]\[]*(\[[^\]\[]*(\[[^\]\[]*(\[[^\]\[]*(\[[^\]\[]*\])*[^\]\[]*\])*[^\]\[]*\])*[^\]\[]*\])*[^\]\[]*\])*[^\]\[]*\])*[^\]\[]*)\]\s*\((?P[^\)]*)\)', txt): + path = m.group('path') + if path and not os.path.isabs(path) and guess_type(path)[0] in OEB_IMAGES and os.path.exists(os.path.join(base_dir, path)): + images.append(path) + + # Markdown reference + refs = {} + for m in re.finditer(ur'(?mu)^(\ ?\ ?\ ?)\[(?P[^\]]*)\]:\s*(?P[^\s]*)$', txt): + if m.group('id') and m.group('path'): + refs[m.group('id')] = m.group('path') + for m in re.finditer(ur'(?mu)\!\[([^\]\[]*(\[[^\]\[]*(\[[^\]\[]*(\[[^\]\[]*(\[[^\]\[]*(\[[^\]\[]*(\[[^\]\[]*\])*[^\]\[]*\])*[^\]\[]*\])*[^\]\[]*\])*[^\]\[]*\])*[^\]\[]*\])*[^\]\[]*)\]\s*\[(?P[^\]]*)\]', txt): + path = refs.get(m.group('id'), None) + if path and not os.path.isabs(path) and guess_type(path)[0] in OEB_IMAGES and os.path.exists(os.path.join(base_dir, path)): + images.append(path) + + # Remove duplicates + return list(set(images)) + + def run(self, path_to_ebook): + with open(path_to_ebook, 'rb') as ebf: + txt = ebf.read() + base_dir = os.path.dirname(path_to_ebook) + images = self._get_image_references(txt, base_dir) + + if images: + # Create TXTZ and put file plus images inside of it. + import zipfile + of = self.temporary_file('_plugin_txt2txtz.txtz') + txtz = zipfile.ZipFile(of.name, 'w') + txtz.write(path_to_ebook, os.path.basename(path_to_ebook), zipfile.ZIP_DEFLATED) + for image in images: + txtz.write(os.path.join(base_dir, image), image) + txtz.close() + + return of.name + else: + # No images so just import the TXT file. + return path_to_ebook + # }}} # Metadata reader plugins {{{ @@ -516,7 +578,7 @@ from calibre.library.catalog import CSV_XML, EPUB_MOBI, BIBTEX from calibre.ebooks.epub.fix.unmanifested import Unmanifested from calibre.ebooks.epub.fix.epubcheck import Epubcheck -plugins = [HTML2ZIP, PML2PMLZ, ArchiveExtract, GoogleBooks, ISBNDB, Amazon, +plugins = [HTML2ZIP, PML2PMLZ, TXT2TXTZ, ArchiveExtract, GoogleBooks, ISBNDB, Amazon, KentDistrictLibrary, DoubanBooks, NiceBooks, CSV_XML, EPUB_MOBI, BIBTEX, Unmanifested, Epubcheck, OpenLibraryCovers, AmazonCovers, DoubanCovers, NiceBooksCovers] From f62a2e1a28a26923c8accd8c55beabaaaa0f839b Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 15 Feb 2011 08:12:39 -0700 Subject: [PATCH 2/3] Fix #8993 (Trim trailing spaces from titles after editing titles) --- src/calibre/gui2/library/models.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/calibre/gui2/library/models.py b/src/calibre/gui2/library/models.py index e839ed4b9c..909afd01df 100644 --- a/src/calibre/gui2/library/models.py +++ b/src/calibre/gui2/library/models.py @@ -825,7 +825,7 @@ class BooksModel(QAbstractTableModel): # {{{ return False val = int(value.toInt()[0]) if column == 'rating' else \ value.toDate() if column in ('timestamp', 'pubdate') else \ - unicode(value.toString()) + unicode(value.toString()).strip() id = self.db.id(row) books_to_refresh = set([id]) if column == 'rating': From 040be5fe031c854edb70528dba078735e8a78451 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 15 Feb 2011 09:14:28 -0700 Subject: [PATCH 3/3] Various Romanian news sources by Silviu Cotoara --- resources/recipes/adevarul.recipe | 50 +++++++++++++++++++++ resources/recipes/capital.recipe | 44 ++++++++++++++++++ resources/recipes/catavencu.recipe | 53 ++++++++++++++++++++++ resources/recipes/gandul.recipe | 47 ++++++++++++++++++++ resources/recipes/hotnews.recipe | 46 +++++++++++++++++++ resources/recipes/jurnalulnational.recipe | 54 +++++++++++++++++++++++ resources/recipes/mediafax.recipe | 52 ++++++++++++++++++++++ resources/recipes/moneyro.recipe | 54 +++++++++++++++++++++++ resources/recipes/prosport.recipe | 49 ++++++++++++++++++++ resources/recipes/realitatea.recipe | 45 +++++++++++++++++++ resources/recipes/standardmoney.recipe | 46 +++++++++++++++++++ resources/recipes/ziarulfinanciar.recipe | 45 +++++++++++++++++++ 12 files changed, 585 insertions(+) create mode 100644 resources/recipes/adevarul.recipe create mode 100644 resources/recipes/capital.recipe create mode 100644 resources/recipes/catavencu.recipe create mode 100644 resources/recipes/gandul.recipe create mode 100644 resources/recipes/hotnews.recipe create mode 100644 resources/recipes/jurnalulnational.recipe create mode 100644 resources/recipes/mediafax.recipe create mode 100644 resources/recipes/moneyro.recipe create mode 100644 resources/recipes/prosport.recipe create mode 100644 resources/recipes/realitatea.recipe create mode 100644 resources/recipes/standardmoney.recipe create mode 100644 resources/recipes/ziarulfinanciar.recipe diff --git a/resources/recipes/adevarul.recipe b/resources/recipes/adevarul.recipe new file mode 100644 index 0000000000..ea0f2826ce --- /dev/null +++ b/resources/recipes/adevarul.recipe @@ -0,0 +1,50 @@ +# -*- coding: utf-8 -*- +#!/usr/bin/env python + +__license__ = 'GPL v3' +__copyright__ = u'2011, Silviu Cotoar\u0103' +''' +adevarul.ro +''' + +from calibre.web.feeds.news import BasicNewsRecipe + +class Adevarul(BasicNewsRecipe): + title = u'Adev\u0103rul' + language = 'ro' + __author__ = u'Silviu Cotoar\u0103' + description = u'\u0218tiri din Rom\u00e2nia' + publisher = 'Adevarul' + category = 'Ziare,Stiri,Romania' + oldest_article = 5 + max_articles_per_feed = 100 + no_stylesheets = True + use_embedded_content = False + encoding = 'utf-8' + remove_javascript = True + cover_url = 'http://upload.wikimedia.org/wikipedia/en/d/d6/Logo_noul_adevarul.png' + + conversion_options = { + 'comments' : description + ,'tags' : category + ,'language' : language + ,'publisher' : publisher + } + + keep_only_tags = [ dict(name='div', attrs={'class':'article_header'}) + ,dict(name='div', attrs={'class':'bd'}) + ] + + + remove_tags = [ dict(name='div', attrs={'class':'bb-wg-article_related_attachements'}) + ,dict(name='div', attrs={'class':'bb-md bb-md-article_comments'}) + ,dict(name='form', attrs={'id':'bb-comment-create-form'}) + ] + + remove_tags_after = [ dict(name='form', attrs={'id':'bb-comment-create-form'}) ] + + feeds = [ (u'\u0218tiri', u'http://www.adevarul.ro/rss/latest') ] + + def preprocess_html(self, soup): + return self.adeify_images(soup) + diff --git a/resources/recipes/capital.recipe b/resources/recipes/capital.recipe new file mode 100644 index 0000000000..b8fc839a06 --- /dev/null +++ b/resources/recipes/capital.recipe @@ -0,0 +1,44 @@ +# -*- coding: utf-8 -*- +#!/usr/bin/env python + +__license__ = 'GPL v3' +__copyright__ = u'2011, Silviu Cotoar\u0103' +''' +capital.ro +''' + +from calibre.web.feeds.news import BasicNewsRecipe + +class Capital(BasicNewsRecipe): + title = 'Capital' + __author__ = u'Silviu Cotoar\u0103' + description = u'\u0218tiri din Rom\u00e2nia' + oldest_article = 5 + language = 'ro' + max_articles_per_feed = 100 + no_stylesheets = True + use_embedded_content = False + category = 'Ziare,Stiri,Romania' + encoding = 'utf-8' + remove_javascript = True + publisher = 'Capital' + cover_url = 'http://www.mediapress.ro/imagini/sigla-capital-s16.gif' + + conversion_options = { + 'comments' : description + ,'tags' : category + ,'language' : language + ,'publisher' : publisher + } + + keep_only_tags = [ dict(name='div', attrs={'class':'single one_article'}) + ] + + remove_tags = [ dict(name='div', attrs={'class':'single_details'}) + , dict(name='div', attrs={'class':'tx-addoceansbanners-pi1'}) + ] + + feeds = [(u'\u0218tiri', u'http://www.capital.ro/rss.html') ] + + def preprocess_html(self, soup): + return self.adeify_images(soup) diff --git a/resources/recipes/catavencu.recipe b/resources/recipes/catavencu.recipe new file mode 100644 index 0000000000..5f056825b5 --- /dev/null +++ b/resources/recipes/catavencu.recipe @@ -0,0 +1,53 @@ +# -*- coding: utf-8 -*- +#!/usr/bin/env python + +__license__ = 'GPL v3' +__copyright__ = u'2011, Silviu Cotoar\u0103' +''' +catavencu.ro +''' + +from calibre.web.feeds.news import BasicNewsRecipe + +class Catavencu(BasicNewsRecipe): + title = u'Academia Ca\u0163avencu' + __author__ = u'Silviu Cotoar\u0103' + description = 'Tagma cum laude' + publisher = 'Catavencu' + oldest_article = 5 + language = 'ro' + max_articles_per_feed = 100 + no_stylesheets = True + use_embedded_content = False + category = 'Ziare' + encoding = 'utf-8' + cover_url = 'http://upload.wikimedia.org/wikipedia/en/1/1e/Academia_Catavencu.jpg' + + conversion_options = { + 'comments' : description + ,'tags' : category + ,'language' : language + ,'publisher' : publisher + } + + keep_only_tags = [ + dict(name='ul', attrs={'class':'articles'}) + ] + + remove_tags = [ + dict(name='div', attrs={'class':['tools']}) + , dict(name='div', attrs={'class':['share']}) + , dict(name='div', attrs={'class':['category']}) + , dict(name='div', attrs={'id':['comments']}) + ] + + remove_tags_after = [ + dict(name='div', attrs={'id':'comments'}) + ] + + feeds = [ + (u'Feeds', u'http://catavencu.ro/feed/rss') + ] + + def preprocess_html(self, soup): + return self.adeify_images(soup) diff --git a/resources/recipes/gandul.recipe b/resources/recipes/gandul.recipe new file mode 100644 index 0000000000..774843f568 --- /dev/null +++ b/resources/recipes/gandul.recipe @@ -0,0 +1,47 @@ +# -*- coding: utf-8 -*- +#!/usr/bin/env python + +__license__ = 'GPL v3' +__copyright__ = u'2011, Silviu Cotoar\u0103' +''' +gandul.info +''' + +from calibre.web.feeds.news import BasicNewsRecipe + +class Gandul(BasicNewsRecipe): + title = u'G\u00E2ndul' + __author__ = u'Silviu Cotoar\u0103' + publisher = 'Gandul' + description = 'Cotidian Online' + oldest_article = 5 + language = 'ro' + max_articles_per_feed = 100 + no_stylesheets = True + use_embedded_content = False + category = 'Ziare,Stiri,Romania' + encoding = 'utf-8' + cover_url = 'http://storage0.dms.mpinteractiv.ro/media/1/1/1706/1064063/1/logo.jpg?width=400' + + conversion_options = { + 'comments' : description + ,'tags' : category + ,'language' : language + ,'publisher' : publisher + } + + keep_only_tags = [ + dict(name='div', attrs={'class':'article'}) + ] + + remove_tags = [ + dict(name='a', attrs={'class':'photo'}) + , dict(name='div', attrs={'class':'ad'}) + ] + + feeds = [ + (u'\u0218tiri', u'http://www.gandul.info/rss-stiri-prima-pagina.xml') + ] + + def preprocess_html(self, soup): + return self.adeify_images(soup) diff --git a/resources/recipes/hotnews.recipe b/resources/recipes/hotnews.recipe new file mode 100644 index 0000000000..44b6f0f57b --- /dev/null +++ b/resources/recipes/hotnews.recipe @@ -0,0 +1,46 @@ +# -*- coding: utf-8 -*- +#!/usr/bin/env python + +__license__ = 'GPL v3' +__copyright__ = u'2011, Silviu Cotoar\u0103' +''' +hotnews.ro +''' + +from calibre.web.feeds.news import BasicNewsRecipe + +class Hotnews(BasicNewsRecipe): + title = 'Hotnews' + __author__ = u'Silviu Cotoar\u0103' + description = u'\u0218tiri din Rom\u00e2nia' + publisher = 'Hotnews' + oldest_article = 5 + language = 'ro' + max_articles_per_feed = 100 + no_stylesheets = True + use_embedded_content = False + category = 'Ziare,Stiri,Romania' + encoding = 'utf-8' + cover_url = 'http://www.hotnews.ro/images/new/logo.gif' + + conversion_options = { + 'comments' : description + ,'tags' : category + ,'language' : language + ,'publisher' : publisher + } + + keep_only_tags = [ + dict(name='h1', attrs={'class':'title'}) + ,dict(name='div', attrs={'id':'articleContent'}) + ] + + feeds = [ (u'\u0218tiri', u'http://www.hotnews.ro/rss/actualitate') + ,(u'English', u'http://www.hotnews.ro/rss/english') + ] + + def preprocess_html(self, soup): + return self.adeify_images(soup) + + + diff --git a/resources/recipes/jurnalulnational.recipe b/resources/recipes/jurnalulnational.recipe new file mode 100644 index 0000000000..ea3ba9c734 --- /dev/null +++ b/resources/recipes/jurnalulnational.recipe @@ -0,0 +1,54 @@ +# -*- coding: utf-8 -*- +#!/usr/bin/env python + +__license__ = 'GPL v3' +__copyright__ = u'2011, Silviu Cotoar\u0103' +''' +jurnalul.ro +''' + +from calibre.web.feeds.news import BasicNewsRecipe + +class JurnalulNational(BasicNewsRecipe): + title = u'Jurnalul Na\u0163ional' + __author__ = u'Silviu Cotoar\u0103' + description = u'\u0218tiri din Rom\u00e2nia' + publisher = 'Jurnalul National' + oldest_article = 5 + language = 'ro' + max_articles_per_feed = 100 + no_stylesheets = True + use_embedded_content = False + category = 'Ziare,Stiri,Romania' + encoding = 'utf-8' + cover_url = 'http://www.jurnalul.ro/images/sigla.png' + + conversion_options = { + 'comments' : description + ,'tags' : category + ,'language' : language + ,'publisher' : publisher + } + + keep_only_tags = [ + dict(name='h1', attrs={'class':'h3 art_title'}) + ,dict(name='div', attrs={'class':'only_text'}) + ] + + feeds = [ + (u'\u0218tiri', u'http://www.jurnalul.ro/rss/stiri-3028.html') + ,(u'Special', u'http://www.jurnalul.ro/rss/special-3001.html') + ,(u'Sport', u'http://www.jurnalul.ro/rss/sport-3035.html') + ,(u'Bani Afaceri', u'http://www.jurnalul.ro/rss/bani-afaceri-3006.html') + ,(u'Viata Sanatoasa', u'http://www.jurnalul.ro/rss/viata-sanatoasa-3010.html') + ,(u'Stiinta Tehnica', u'http://www.jurnalul.ro/rss/stiinta-tehnica-3019.html') + ,(u'Timp Liber', u'http://www.jurnalul.ro/rss/timp-liber-3022.html') + ,(u'Fun', u'http://www.jurnalul.ro/rss/fun-3038.html') + ,(u'Acum 20 de ani', u'http://www.jurnalul.ro/rss/acum-20-de-ani-3073.html') + ] + + def preprocess_html(self, soup): + return self.adeify_images(soup) + + + diff --git a/resources/recipes/mediafax.recipe b/resources/recipes/mediafax.recipe new file mode 100644 index 0000000000..3d8a5b34a3 --- /dev/null +++ b/resources/recipes/mediafax.recipe @@ -0,0 +1,52 @@ +# -*- coding: utf-8 -*- +#!/usr/bin/env python + +__license__ = 'GPL v3' +__copyright__ = u'2011, Silviu Cotoar\u0103' +''' +mediafax.ro +''' + +from calibre.web.feeds.news import BasicNewsRecipe + +class Mediafax(BasicNewsRecipe): + title = 'Mediafax' + __author__ = u'Silviu Cotoar\u0103' + description = u'\u0218tiri din Rom\u00e2nia' + publisher = 'Mediafax' + oldest_article = 5 + language = 'ro' + max_articles_per_feed = 100 + no_stylesheets = True + use_embedded_content = False + category = 'Ziare,Stiri,Romania' + encoding = 'utf-8' + cover_url = 'http://storage0.dms.mpinteractiv.ro/media/1/1/1706/4134575/2/logo-mediafax-mass-media-news.jpg?width=400' + + conversion_options = { + 'comments' : description + ,'tags' : category + ,'language' : language + ,'publisher' : publisher + + } + + keep_only_tags = [ + dict(name='div', attrs={'class':'news tabs-container'}) + ] + + remove_tags = [ + dict(name='ul', attrs={'class':['CategoryNews']}) + ,dict(name='div', attrs={'class':['read']}) + ] + + remove_tags_after = [ dict(name='div', attrs={'class':'cmsItemViews'}) ] + + + feeds = [ + (u'Actualitate', u'http://www.mediafax.ro/rss/') + ] + + + def preprocess_html(self, soup): + return self.adeify_images(soup) diff --git a/resources/recipes/moneyro.recipe b/resources/recipes/moneyro.recipe new file mode 100644 index 0000000000..4a280207ed --- /dev/null +++ b/resources/recipes/moneyro.recipe @@ -0,0 +1,54 @@ +# -*- coding: utf-8 -*- +#!/usr/bin/env python + +__license__ = 'GPL v3' +__copyright__ = u'2011, Silviu Cotoar\u0103' +''' +money.ro +''' + +from calibre.web.feeds.news import BasicNewsRecipe + +class MoneyRo(BasicNewsRecipe): + title = 'Money Ro' + __author__ = u'Silviu Cotoar\u0103' + description = u'\u0218tiri din Rom\u00e2nia' + publisher = 'MoneyRo' + oldest_article = 5 + language = 'ro' + max_articles_per_feed = 100 + no_stylesheets = True + use_embedded_content = False + category = 'Ziare,Stiri,Romania' + encoding = 'utf-8' + remove_javascript = True + cover_url = 'http://assets.moneyweb.ro/images/logo_money.jpg' + + conversion_options = { + 'comments' : description + ,'tags' : category + ,'language' : language + ,'publisher' : publisher + } + + keep_only_tags = [ dict(name='div', attrs={'id':'titluArticol'}) + , dict(name='img', attrs={'id':'objImage'}) + , dict(name='div', attrs={'class':'leftColumnArticle'}) + ] + + remove_tags_after = [ dict(name='div', attrs={'id':'articleTags'}) ] + + remove_tags = [ dict(name='div', attrs={'id':'ads'}) + , dict(name='div', attrs={'id':'aus'}) + , dict(name='div', attrs={'id':'bb-comment-create-form'}) + , dict(name='div', attrs={'id':'articleTags'}) + , dict(name='div', attrs={'class':'breadcrumb'}) + ] + + feeds = [(u'\u0218tiri', u'http://moneyro.feedsportal.com/c/32533/fe.ed/rss.money.ro/stiri.xml') ] + + def preprocess_html(self, soup): + return self.adeify_images(soup) + + + diff --git a/resources/recipes/prosport.recipe b/resources/recipes/prosport.recipe new file mode 100644 index 0000000000..d1be8bd1f2 --- /dev/null +++ b/resources/recipes/prosport.recipe @@ -0,0 +1,49 @@ +# -*- coding: utf-8 -*- +#!/usr/bin/env python + +__license__ = 'GPL v3' +__copyright__ = u'2011, Silviu Cotoar\u0103' +''' +prosport.ro +''' + +from calibre.web.feeds.news import BasicNewsRecipe + +class Prosport(BasicNewsRecipe): + title = 'Prosport' + __author__ = u'Silviu Cotoar\u0103' + publisher = 'Prosport' + description = u'\u0218tiri Sportive din Rom\u00e2nia' + oldest_article = 5 + language = 'ro' + max_articles_per_feed = 100 + no_stylesheets = True + use_embedded_content = False + category = 'Ziare,Stiri,Romania,Sport' + encoding = 'utf-8' + cover_url = 'http://storage0.dms.mpinteractiv.ro/media/401/581/7946/3688311/1/logo-pro.jpg?width=610' + + conversion_options = { + 'comments' : description + ,'tags' : category + ,'language' : language + ,'publisher' : publisher + } + + keep_only_tags = [ + dict(name='h1', attrs={'class':'a-title'}) + ,dict(name='div', attrs={'class':'a-entry'}) + ] + + remove_tags = [ dict(name='div', attrs={'class':'utils'}) + ,dict(name='div', attrs={'class':'g-slide'}) + ] + + + feeds = [ (u'\u0218tiri', u'http://www.prosport.ro/rss.xml')] + + def preprocess_html(self, soup): + return self.adeify_images(soup) + + + diff --git a/resources/recipes/realitatea.recipe b/resources/recipes/realitatea.recipe new file mode 100644 index 0000000000..ae5b7d8688 --- /dev/null +++ b/resources/recipes/realitatea.recipe @@ -0,0 +1,45 @@ +# -*- coding: utf-8 -*- +#!/usr/bin/env python + +__license__ = 'GPL v3' +__copyright__ = u'2011, Silviu Cotoar\u0103' +''' +realitatea.net +''' + +from calibre.web.feeds.news import BasicNewsRecipe + +class Realitatea(BasicNewsRecipe): + title = 'Realitatea' + __author__ = u'Silviu Cotoar\u0103' + publisher = 'Realitatea' + description = u'\u0218tiri din Rom\u00e2nia' + oldest_article = 5 + language = 'ro' + max_articles_per_feed = 100 + no_stylesheets = True + use_embedded_content = False + category = 'Ziare,Stiri,Romania' + encoding = 'utf-8' + cover_url = 'http://assets.realitatea.ro/images/logo.jpg' + + conversion_options = { + 'comments' : description + ,'tags' : category + ,'language' : language + ,'publisher' : publisher + } + + keep_only_tags = [ + dict(name='div', attrs={'class':'articleTitle '}) + ,dict(name='div', attrs={'class':'articleBody'}) + ] + + remove_tags = [ dict(name='div', attrs={'id':'aus'}) ] + feeds = [ (u'\u0218tiri', u'http://realitatea.feedsportal.com/c/32533/fe.ed/rss.realitatea.net/stiri.xml') ] + + def preprocess_html(self, soup): + return self.adeify_images(soup) + + + diff --git a/resources/recipes/standardmoney.recipe b/resources/recipes/standardmoney.recipe new file mode 100644 index 0000000000..b02abde447 --- /dev/null +++ b/resources/recipes/standardmoney.recipe @@ -0,0 +1,46 @@ +# -*- coding: utf-8 -*- +#!/usr/bin/env python + +__license__ = 'GPL v3' +__copyright__ = u'2011, Silviu Cotoar\u0103' +''' +standard.money.ro +''' + +from calibre.web.feeds.news import BasicNewsRecipe + +class StandardMoneyRo(BasicNewsRecipe): + title = 'Standard Money Ro' + __author__ = u'Silviu Cotoar\u0103' + publisher = 'Standard Money' + description = 'Portal de Business' + oldest_article = 5 + language = 'ro' + max_articles_per_feed = 100 + no_stylesheets = True + use_embedded_content = False + category = 'Ziare,Stiri,Romania' + encoding = 'utf-8' + cover_url = 'http://assets.standard.ro/wp-content/themes/standard/images/standard-logo.gif' + + conversion_options = { + 'comments' : description + ,'tags' : category + ,'language' : language + ,'publisher' : publisher + } + + keep_only_tags = [ + dict(name='h1', attrs={'class':'post-title'}) + , dict(name='div', attrs={'class':'content_post'}) + ] + + feeds = [ + (u'Actualitate', u'http://standard.money.ro/feed') + ] + + def preprocess_html(self, soup): + return self.adeify_images(soup) + + + diff --git a/resources/recipes/ziarulfinanciar.recipe b/resources/recipes/ziarulfinanciar.recipe new file mode 100644 index 0000000000..0b10d997b0 --- /dev/null +++ b/resources/recipes/ziarulfinanciar.recipe @@ -0,0 +1,45 @@ +# -*- coding: utf-8 -*- +#!/usr/bin/env python + +__license__ = 'GPL v3' +__copyright__ = u'2011, Silviu Cotoar\u0103' +''' +zf.ro +''' + +from calibre.web.feeds.news import BasicNewsRecipe + +class ZiarulFinanciar(BasicNewsRecipe): + title = 'Ziarul Financiar' + __author__ = u'Silviu Cotoar\u0103' + description = u'\u0218tiri din Business' + publisher = 'Ziarul Financiar' + oldest_article = 5 + language = 'ro' + max_articles_per_feed = 100 + no_stylesheets = True + use_embedded_content = False + category = 'Ziare,Stiri,Romania' + encoding = 'utf-8' + cover_url = 'http://storage0.dms.mpinteractiv.ro/media/1/1/1706/7462721/1/ziarul-financiar-big.jpg?width=400' + + conversion_options = { + 'comments' : description + ,'tags' : category + ,'language' : language + ,'publisher' : publisher + } + + keep_only_tags = [ + dict(name='div', attrs={'class':'article'}) + ] + + feeds = [ + (u'\u0218tiri', u'http://www.zf.ro/rss/zf-24/') + ] + + def preprocess_html(self, soup): + return self.adeify_images(soup) + + +