diff --git a/recipes/aktualne.cz.recipe b/recipes/aktualne.cz.recipe
new file mode 100644
index 0000000000..cd2dcc5f09
--- /dev/null
+++ b/recipes/aktualne.cz.recipe
@@ -0,0 +1,69 @@
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
+from __future__ import unicode_literals
+from calibre.web.feeds.recipes import BasicNewsRecipe
+import re
+
+class aktualneRecipe(BasicNewsRecipe):
+ __author__ = 'bubak'
+ title = u'aktualne.cz'
+ publisher = u'Centrum holdings'
+ description = 'aktuálně.cz'
+ oldest_article = 1
+ max_articles_per_feed = 20
+
+ feeds = [
+ (u'Domácí', u'http://aktualne.centrum.cz/feeds/rss/domaci/?photo=0'),
+ (u'Zprávy', u'http://aktualne.centrum.cz/feeds/rss/zpravy/?photo=0'),
+ (u'Praha', u'http://aktualne.centrum.cz/feeds/rss/domaci/regiony/praha/?photo=0'),
+ (u'Ekonomika', u'http://aktualne.centrum.cz/feeds/rss/ekonomika/?photo=0'),
+ (u'Finance', u'http://aktualne.centrum.cz/feeds/rss/finance/?photo=0'),
+ (u'Blogy a názory', u'http://blog.aktualne.centrum.cz/export-all.php')
+ ]
+
+
+ language = 'cs'
+ cover_url = 'http://img.aktualne.centrum.cz/design/akt4/o/l/logo-akt-ciste.png'
+ remove_javascript = True
+ no_stylesheets = True
+
+ remove_attributes = []
+ remove_tags_before = dict(name='h1', attrs={'class':['titulek-clanku']})
+ filter_regexps = [r'img.aktualne.centrum.cz']
+ remove_tags = [dict(name='div', attrs={'id':['social-bookmark']}),
+ dict(name='div', attrs={'class':['box1', 'svazane-tagy']}),
+ dict(name='div', attrs={'class':'itemcomment id0'}),
+ dict(name='div', attrs={'class':'hlavicka'}),
+ dict(name='div', attrs={'class':'hlavni-menu'}),
+ dict(name='div', attrs={'class':'top-standard-brand-obal'}),
+ dict(name='div', attrs={'class':'breadcrumb'}),
+ dict(name='div', attrs={'id':'start-standard'}),
+ dict(name='div', attrs={'id':'forum'}),
+ dict(name='span', attrs={'class':'akce'}),
+ dict(name='span', attrs={'class':'odrazka vetsi'}),
+ dict(name='div', attrs={'class':'boxP'}),
+ dict(name='div', attrs={'class':'box2'})]
+ preprocess_regexps = [
+ (re.compile(r'
'),
+ (re.compile(r'
')]
+
+ keep_only_tags = []
+
+ visited_urls = {}
+ def get_article_url(self, article):
+ url = BasicNewsRecipe.get_article_url(self, article)
+ if url in self.visited_urls:
+ self.log.debug('Ignoring duplicate: ' + url)
+ return None
+ else:
+ self.visited_urls[url] = True
+ self.log.debug('Accepting: ' + url)
+ return url
+
+ def encoding(self, source):
+ if source.newurl.find('blog.aktualne') >= 0:
+ enc = 'utf-8'
+ else:
+ enc = 'iso-8859-2'
+ self.log.debug('Called encoding ' + enc + " " + str(source.newurl))
+ return source.decode(enc, 'replace')
+
diff --git a/recipes/antyweb.recipe b/recipes/antyweb.recipe
new file mode 100644
index 0000000000..c2576191dd
--- /dev/null
+++ b/recipes/antyweb.recipe
@@ -0,0 +1,48 @@
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class AntywebRecipe(BasicNewsRecipe):
+ encoding = 'utf-8'
+ __license__ = 'GPL v3'
+ __author__ = u'Artur Stachecki
'
+ language = 'pl'
+ version = 1
+ title = u'Antyweb'
+ category = u'News'
+ description = u'Blog o internecie i nowych technologiach'
+ cover_url=''
+ remove_empty_feeds= True
+ auto_cleanup = False
+ no_stylesheets=True
+ use_embedded_content = False
+ oldest_article = 1
+ max_articles_per_feed = 100
+ remove_javascript = True
+ simultaneous_downloads = 3
+
+ keep_only_tags =[]
+ keep_only_tags.append(dict(name = 'h1', attrs = { 'class' : 'mm-article-title'}))
+ keep_only_tags.append(dict(name = 'div', attrs = {'class' : 'mm-article-content'}))
+
+
+ remove_tags =[]
+ remove_tags.append(dict(name = 'h2', attrs = {'class' : 'widgettitle'}))
+ remove_tags.append(dict(name = 'img', attrs = {'class' : 'alignleft'}))
+ remove_tags.append(dict(name = 'div', attrs = {'class' : 'float: right;margin-left:1em;margin-bottom: 0.5em;padding-bottom: 3px; width: 72px;'}))
+ remove_tags.append(dict(name = 'img', attrs = {'src' : 'http://antyweb.pl/wp-content/uploads/2011/09/HOSTERSI_testy_pasek600x30.gif'}))
+ remove_tags.append(dict(name = 'div', attrs = {'class' : 'podwpisowe'}))
+
+
+ extra_css = '''
+ body {font-family: verdana, arial, helvetica, geneva, sans-serif ;}
+ '''
+
+ feeds = [
+ (u'Artykuly', u'feed://feeds.feedburner.com/Antyweb?format=xml'),
+ ]
+ def preprocess_html(self, soup):
+ for alink in soup.findAll('a'):
+ if alink.string is not None:
+ tstr = alink.string
+ alink.replaceWith(tstr)
+ return soup
diff --git a/recipes/bankier_pl.recipe b/recipes/bankier_pl.recipe
new file mode 100644
index 0000000000..8a68d844b3
--- /dev/null
+++ b/recipes/bankier_pl.recipe
@@ -0,0 +1,50 @@
+#!/usr/bin/env python
+
+__license__ = 'GPL v3'
+__author__ = 'teepel '
+
+'''
+bankier.pl
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class bankier(BasicNewsRecipe):
+ title = u'Bankier.pl'
+ __author__ = 'teepel '
+ language = 'pl'
+ description ='Polski portal finansowy. Informacje o: gospodarka, inwestowanie, finanse osobiste, prowadzenie firmy, kursy walut, notowania akcji, fundusze.'
+ masthead_url='http://www.bankier.pl/gfx/hd-mid-02.gif'
+ INDEX='http://bankier.pl/'
+ remove_empty_feeds= True
+ oldest_article = 1
+ max_articles_per_feed = 100
+ remove_javascript=True
+ no_stylesheets=True
+ simultaneous_downloads = 5
+
+ keep_only_tags =[]
+ keep_only_tags.append(dict(name = 'div', attrs = {'align' : 'left'}))
+
+ remove_tags =[]
+ remove_tags.append(dict(name = 'table', attrs = {'cellspacing' : '2'}))
+ remove_tags.append(dict(name = 'div', attrs = {'align' : 'center'}))
+ remove_tags.append(dict(name = 'img', attrs = {'src' : '/gfx/hd-mid-02.gif'}))
+ #remove_tags.append(dict(name = 'a', attrs = {'target' : '_blank'}))
+ #remove_tags.append(dict(name = 'br', attrs = {'clear' : 'all'}))
+
+ feeds = [
+ (u'Wiadomości dnia', u'http://feeds.feedburner.com/bankier-wiadomosci-dnia'),
+ (u'Finanse osobiste', u'http://feeds.feedburner.com/bankier-finanse-osobiste'),
+ (u'Firma', u'http://feeds.feedburner.com/bankier-firma'),
+ (u'Giełda', u'http://feeds.feedburner.com/bankier-gielda'),
+ (u'Rynek walutowy', u'http://feeds.feedburner.com/bankier-rynek-walutowy'),
+ (u'Komunikaty ze spółek', u'http://feeds.feedburner.com/bankier-espi'),
+ ]
+ def print_version(self, url):
+ segment = url.split('.')
+ urlPart = segment[2]
+ segments = urlPart.split('-')
+ urlPart2 = segments[-1]
+ return 'http://www.bankier.pl/wiadomosci/print.html?article_id=' + urlPart2
+
diff --git a/recipes/blesk.recipe b/recipes/blesk.recipe
new file mode 100644
index 0000000000..7eff4c42d0
--- /dev/null
+++ b/recipes/blesk.recipe
@@ -0,0 +1,55 @@
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
+from __future__ import unicode_literals
+from calibre.web.feeds.recipes import BasicNewsRecipe
+import re
+
+class bleskRecipe(BasicNewsRecipe):
+ __author__ = 'bubak'
+ title = u'Blesk'
+ publisher = u''
+ description = 'blesk.cz'
+ oldest_article = 1
+ max_articles_per_feed = 20
+ use_embedded_content = False
+
+ feeds = [
+ (u'Zprávy', u'http://www.blesk.cz/rss/7'),
+ (u'Blesk', u'http://www.blesk.cz/rss/1'),
+ (u'Sex a tabu', u'http://www.blesk.cz/rss/2'),
+ (u'Celebrity', u'http://www.blesk.cz/rss/5'),
+ (u'Cestování', u'http://www.blesk.cz/rss/12')
+ ]
+
+
+ #encoding = 'iso-8859-2'
+ language = 'cs'
+ cover_url = 'http://img.blesk.cz/images/blesk/blesk-logo.png'
+ remove_javascript = True
+ no_stylesheets = True
+ extra_css = """
+ """
+
+ remove_attributes = []
+ remove_tags_before = dict(name='div', attrs={'id':['boxContent']})
+ remove_tags_after = dict(name='div', attrs={'class':['artAuthors']})
+ remove_tags = [dict(name='div', attrs={'class':['link_clanek']}),
+ dict(name='div', attrs={'id':['partHeader']}),
+ dict(name='div', attrs={'id':['top_bottom_box', 'lista_top']})]
+ preprocess_regexps = [(re.compile(r'')]
+
+ keep_only_tags = [dict(name='div', attrs={'class':'articleContent'})]
+
+ visited_urls = {}
+ def get_article_url(self, article):
+ url = BasicNewsRecipe.get_article_url(self, article)
+ if url in self.visited_urls:
+ self.log.debug('Ignoring duplicate: ' + url)
+ return None
+ else:
+ self.visited_urls[url] = True
+ self.log.debug('Accepting: ' + url)
+ return url
+
+
+
+
diff --git a/recipes/buchreport.recipe b/recipes/buchreport.recipe
new file mode 100644
index 0000000000..5ed34d1ee8
--- /dev/null
+++ b/recipes/buchreport.recipe
@@ -0,0 +1,45 @@
+from calibre.web.feeds.recipes import BasicNewsRecipe
+
+'''Calibre recipe to convert the RSS feeds of the Buchreport to an ebook.'''
+
+class Buchreport(BasicNewsRecipe) :
+ __author__ = 'a.peter'
+ __copyright__ = 'a.peter'
+ __license__ = 'GPL v3'
+ description = 'Buchreport'
+ version = 4
+ title = u'Buchreport'
+ timefmt = ' [%d.%m.%Y]'
+ encoding = 'cp1252'
+ language = 'de'
+
+
+ extra_css = 'body { margin-left: 0.00em; margin-right: 0.00em; } \
+ article, articledate, articledescription { text-align: left; } \
+ h1 { text-align: left; font-size: 140%; font-weight: bold; } \
+ h2 { text-align: left; font-size: 100%; font-weight: bold; font-style: italic; } \
+ h3 { text-align: left; font-size: 100%; font-weight: regular; font-style: italic; } \
+ h4, h5, h6 { text-align: left; font-size: 100%; font-weight: bold; }'
+
+ oldest_article = 7.0
+ no_stylesheets = True
+ remove_javascript = True
+ use_embedded_content = False
+ publication_type = 'newspaper'
+
+ remove_tags_before = dict(name='h2')
+ remove_tags_after = [
+ dict(name='div', attrs={'style':["padding-top:10px;clear:both"]})
+ ]
+ remove_tags = [
+ dict(name='div', attrs={'style':["padding-top:10px;clear:both"]}),
+ dict(name='iframe'),
+ dict(name='img')
+ ]
+
+ feeds = [
+ (u'Buchreport', u'http://www.buchreport.de/index.php?id=5&type=100')
+ ]
+
+ def get_masthead_url(self):
+ return 'http://www.buchreport.de/fileadmin/template/img/buchreport_logo.jpg'
diff --git a/recipes/business_standard.recipe b/recipes/business_standard.recipe
index badca48733..a61c32aa42 100644
--- a/recipes/business_standard.recipe
+++ b/recipes/business_standard.recipe
@@ -1,5 +1,5 @@
__license__ = 'GPL v3'
-__copyright__ = '2009-2010, Darko Miletic
'
+__copyright__ = '2009-2012, Darko Miletic '
'''
www.business-standard.com
'''
@@ -14,10 +14,12 @@ class BusinessStandard(BasicNewsRecipe):
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
+ auto_cleanup = False
encoding = 'cp1252'
publisher = 'Business Standard Limited'
category = 'news, business, money, india, world'
language = 'en_IN'
+ masthead_url = 'http://feeds.business-standard.com/images/logo_08.jpg'
conversion_options = {
'comments' : description
@@ -26,7 +28,7 @@ class BusinessStandard(BasicNewsRecipe):
,'publisher' : publisher
,'linearize_tables': True
}
- keep_only_tags=[dict(attrs={'class':'TableClas'})]
+ #keep_only_tags=[dict(name='td', attrs={'class':'TableClas'})]
remove_tags = [
dict(name=['object','link','script','iframe','base','meta'])
,dict(attrs={'class':'rightDiv2'})
@@ -45,3 +47,8 @@ class BusinessStandard(BasicNewsRecipe):
,(u'Management & Mktg' , u'http://feeds.business-standard.com/rss/7_0.xml' )
,(u'Opinion' , u'http://feeds.business-standard.com/rss/5_0.xml' )
]
+
+ def print_version(self, url):
+ l, s, tp = url.rpartition('/')
+ t, k, autono = l.rpartition('/')
+ return 'http://www.business-standard.com/india/printpage.php?autono=' + autono + '&tp=' + tp
diff --git a/recipes/ceska_pozice.recipe b/recipes/ceska_pozice.recipe
new file mode 100644
index 0000000000..478f6823b9
--- /dev/null
+++ b/recipes/ceska_pozice.recipe
@@ -0,0 +1,68 @@
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
+from __future__ import unicode_literals
+from calibre.web.feeds.recipes import BasicNewsRecipe
+
+class ceskaPoziceRecipe(BasicNewsRecipe):
+ __author__ = 'bubak'
+ title = u'Česká pozice'
+ description = 'Česká pozice'
+ oldest_article = 2
+ max_articles_per_feed = 20
+
+ feeds = [
+ (u'Všechny články', u'http://www.ceskapozice.cz/rss.xml'),
+ (u'Domov', u'http://www.ceskapozice.cz/taxonomy/term/16/feed'),
+ (u'Chrono', u'http://www.ceskapozice.cz/chrono/feed'),
+ (u'Evropa', u'http://www.ceskapozice.cz/taxonomy/term/17/feed')
+ ]
+
+
+ language = 'cs'
+ cover_url = 'http://www.ceskapozice.cz/sites/default/files/cpozice_logo.png'
+ remove_javascript = True
+ no_stylesheets = True
+ domain = u'http://www.ceskapozice.cz'
+ use_embedded_content = False
+
+
+ remove_tags = [dict(name='div', attrs={'class':['block-ad', 'region region-content-ad']}),
+ dict(name='ul', attrs={'class':'links'}),
+ dict(name='div', attrs={'id':['comments', 'back-to-top']}),
+ dict(name='div', attrs={'class':['next-page', 'region region-content-ad']}),
+ dict(name='cite')]
+
+ keep_only_tags = [dict(name='div', attrs={'id':'content'})]
+
+ visited_urls = {}
+ def get_article_url(self, article):
+ url = BasicNewsRecipe.get_article_url(self, article)
+ if url in self.visited_urls:
+ self.log.debug('Ignoring duplicate: ' + url)
+ return None
+ else:
+ self.visited_urls[url] = True
+ self.log.debug('Accepting: ' + url)
+ return url
+
+ def preprocess_html(self, soup):
+ self.append_page(soup, soup.body, 3)
+ return soup
+
+ def append_page(self, soup, appendtag, position):
+ pager = soup.find('div', attrs={'class':'paging-bottom'})
+ if pager:
+ nextbutton = pager.find('li', attrs={'class':'pager-next'})
+ if nextbutton:
+ nexturl = self.domain + nextbutton.a['href']
+ soup2 = self.index_to_soup(nexturl)
+ texttag = soup2.find('div', attrs={'class':'main-body'})
+ for it in texttag.findAll('div', attrs={'class':'region region-content-ad'}):
+ it.extract()
+ for it in texttag.findAll('cite'):
+ it.extract()
+ newpos = len(texttag.contents)
+ self.append_page(soup2, texttag, newpos)
+ texttag.extract()
+ appendtag.insert(position, texttag)
+ pager.extract()
+
diff --git a/recipes/ceske_noviny.recipe b/recipes/ceske_noviny.recipe
new file mode 100644
index 0000000000..10dd16689d
--- /dev/null
+++ b/recipes/ceske_noviny.recipe
@@ -0,0 +1,30 @@
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
+from __future__ import unicode_literals
+from calibre.web.feeds.recipes import BasicNewsRecipe
+
+class ceskenovinyRecipe(BasicNewsRecipe):
+ __author__ = 'bubak'
+ title = u'České Noviny'
+ description = 'ceskenoviny.cz'
+ oldest_article = 1
+ max_articles_per_feed = 20
+
+ feeds = [
+ (u'Domácí', u'http://www.ceskenoviny.cz/sluzby/rss/domov.php')
+ #,(u'Hlavní události', u'http://www.ceskenoviny.cz/sluzby/rss/index.php')
+ #,(u'Přehled zpráv', u'http://www.ceskenoviny.cz/sluzby/rss/zpravy.php')
+ #,(u'Ze světa', u'http://www.ceskenoviny.cz/sluzby/rss/svet.php')
+ #,(u'Kultura', u'http://www.ceskenoviny.cz/sluzby/rss/kultura.php')
+ #,(u'IT', u'http://www.ceskenoviny.cz/sluzby/rss/pocitace.php')
+ ]
+
+
+ language = 'cs'
+ cover_url = 'http://i4.cn.cz/grafika/cn_logo-print.gif'
+ remove_javascript = True
+ no_stylesheets = True
+
+ remove_attributes = []
+ filter_regexps = [r'img.aktualne.centrum.cz']
+
+ keep_only_tags = [dict(name='div', attrs={'id':'clnk'})]
diff --git a/recipes/cesky_rozhlas_6.recipe b/recipes/cesky_rozhlas_6.recipe
new file mode 100644
index 0000000000..eca32af02c
--- /dev/null
+++ b/recipes/cesky_rozhlas_6.recipe
@@ -0,0 +1,26 @@
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
+from __future__ import unicode_literals
+from calibre.web.feeds.recipes import BasicNewsRecipe
+
+class cro6Recipe(BasicNewsRecipe):
+ __author__ = 'bubak'
+ title = u'Český rozhlas 6'
+ description = 'Český rozhlas 6'
+ oldest_article = 1
+ max_articles_per_feed = 20
+
+ feeds = [
+ (u'Český rozhlas 6', u'http://www.rozhlas.cz/export/cro6/')
+ ]
+
+
+ language = 'cs'
+ cover_url = 'http://www.rozhlas.cz/img/e5/logo/cro6.png'
+ remove_javascript = True
+ no_stylesheets = True
+
+ remove_attributes = []
+ remove_tags = [dict(name='div', attrs={'class':['audio-play-all', 'poradHeaders', 'actions']}),
+ dict(name='p', attrs={'class':['para-last']})]
+
+ keep_only_tags = [dict(name='div', attrs={'id':'article'})]
diff --git a/recipes/demagog.cz.recipe b/recipes/demagog.cz.recipe
new file mode 100644
index 0000000000..7d89af41bd
--- /dev/null
+++ b/recipes/demagog.cz.recipe
@@ -0,0 +1,39 @@
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
+from __future__ import unicode_literals
+from calibre.web.feeds.recipes import BasicNewsRecipe
+import re
+
+class demagogRecipe(BasicNewsRecipe):
+ __author__ = 'bubak'
+ title = u'Demagog.cz'
+ publisher = u''
+ description = 'demagog.cz'
+ oldest_article = 6
+ max_articles_per_feed = 20
+ use_embedded_content = False
+ remove_empty_feeds = True
+
+ feeds = [
+ (u'Aktuality', u'http://demagog.cz/rss')
+ ]
+
+
+ #encoding = 'iso-8859-2'
+ language = 'cs'
+ cover_url = 'http://demagog.cz/content/images/demagog.cz.png'
+ remove_javascript = True
+ no_stylesheets = True
+ extra_css = """
+ .vyrok_suhrn{margin-top:50px; }
+ .vyrok{margin-bottom:30px; }
+ """
+
+ remove_tags = [dict(name='a', attrs={'class':'vyrok_odovodnenie_tgl'}),
+ dict(name='img', attrs={'class':'vyrok_fotografia'})]
+ remove_tags_before = dict(name='h1')
+ remove_tags_after = dict(name='div', attrs={'class':'vyrok_text_after'})
+ preprocess_regexps = [(re.compile(r'()', re.DOTALL|re.IGNORECASE), lambda match: '\1
')]
+
+
+
+
diff --git a/recipes/denik.cz.recipe b/recipes/denik.cz.recipe
new file mode 100644
index 0000000000..2ccf8caa40
--- /dev/null
+++ b/recipes/denik.cz.recipe
@@ -0,0 +1,36 @@
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
+from __future__ import unicode_literals
+from calibre.web.feeds.recipes import BasicNewsRecipe
+
+class ceskyDenikRecipe(BasicNewsRecipe):
+ __author__ = 'bubak'
+ title = u'denik.cz'
+ publisher = u''
+ description = u'Český deník'
+ oldest_article = 1
+ max_articles_per_feed = 20
+ use_embedded_content = False
+ remove_empty_feeds = True
+
+ feeds = [
+ (u'Z domova', u'http://www.denik.cz/rss/z_domova.html')
+ ,(u'Pražský deník - Moje Praha', u'http://prazsky.denik.cz/rss/zpravy_region.html')
+ #,(u'Zahraničí', u'http://www.denik.cz/rss/ze_sveta.html')
+ #,(u'Kultura', u'http://www.denik.cz/rss/kultura.html')
+ ]
+
+
+ #encoding = 'iso-8859-2'
+ language = 'cs'
+ cover_url = 'http://g.denik.cz/images/loga/denik.png'
+ remove_javascript = True
+ no_stylesheets = True
+ extra_css = """
+ """
+
+ remove_tags = []
+ keep_only_tags = [dict(name='div', attrs={'class':'content'})]
+ #remove_tags_before = dict(name='h1')
+ remove_tags_after = dict(name='p', attrs={'class':'clanek-autor'})
+
+
diff --git a/recipes/denik_referendum.recipe b/recipes/denik_referendum.recipe
new file mode 100644
index 0000000000..e04871d067
--- /dev/null
+++ b/recipes/denik_referendum.recipe
@@ -0,0 +1,28 @@
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
+from __future__ import unicode_literals
+from calibre.web.feeds.recipes import BasicNewsRecipe
+
+class denikReferendumRecipe(BasicNewsRecipe):
+ __author__ = 'bubak'
+ title = u'Den\u00edk Referendum'
+ publisher = u''
+ description = ''
+ oldest_article = 1
+ max_articles_per_feed = 20
+
+ feeds = [
+ (u'Deník Referendum', u'http://feeds.feedburner.com/DenikReferendum')
+ ]
+
+
+ #encoding = 'iso-8859-2'
+ language = 'cs'
+ remove_javascript = True
+ no_stylesheets = True
+ use_embedded_content = False
+ remove_attributes = []
+ remove_tags_after = dict(name='div', attrs={'class':['text']})
+ remove_tags = [dict(name='div', attrs={'class':['box boxLine', 'box noprint', 'box']}),
+ dict(name='h3', attrs={'class':'head alt'})]
+
+ keep_only_tags = [dict(name='div', attrs={'id':['content']})]
diff --git a/recipes/editoriali.recipe b/recipes/editoriali.recipe
index 1b0c558df4..c5596bd743 100644
--- a/recipes/editoriali.recipe
+++ b/recipes/editoriali.recipe
@@ -7,6 +7,7 @@ class AdvancedUserRecipe1332847053(BasicNewsRecipe):
title = u'Editoriali'
__author__ = 'faber1971'
description = 'Leading articles on Italy by the best Italian editorials'
+ language = 'it'
oldest_article = 1
max_articles_per_feed = 100
diff --git a/recipes/f1_ultra.recipe b/recipes/f1_ultra.recipe
new file mode 100644
index 0000000000..ada82542fc
--- /dev/null
+++ b/recipes/f1_ultra.recipe
@@ -0,0 +1,35 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+import re
+
+class f1ultra(BasicNewsRecipe):
+ title = u'Formuła 1 - F1 ultra'
+ __license__ = 'GPL v3'
+ __author__ = 'MrStefan
, Artur Stachecki '
+ language = 'pl'
+ description =u'Formuła 1, Robert Kubica, F3, GP2 oraz inne serie wyścigowe.'
+ masthead_url='http://www.f1ultra.pl/templates/f1ultra/images/logo.gif'
+ remove_empty_feeds= True
+ oldest_article = 1
+ max_articles_per_feed = 100
+ remove_javascript=True
+ no_stylesheets=True
+
+ keep_only_tags =[(dict(name = 'div', attrs = {'id' : 'main'}))]
+ remove_tags_after =[dict(attrs = {'style' : 'margin-top:5px;margin-bottom:5px;display: inline;'})]
+ remove_tags =[(dict(attrs = {'class' : ['buttonheading', 'avPlayerContainer', 'createdate']}))]
+ remove_tags.append(dict(attrs = {'title' : ['PDF', 'Drukuj', 'Email']}))
+ remove_tags.append(dict(name = 'form', attrs = {'method' : 'post'}))
+ remove_tags.append(dict(name = 'hr', attrs = {'size' : '2'}))
+
+ preprocess_regexps = [(re.compile(r'align="left"'), lambda match: ''),
+ (re.compile(r'align="right"'), lambda match: ''),
+ (re.compile(r'width=\"*\"'), lambda match: ''),
+ (re.compile(r'\'), lambda match: '')]
+
+
+ extra_css = '''.contentheading { font-size: 1.4em; font-weight: bold; }
+ img { display: block; clear: both;}
+ '''
+ remove_attributes = ['width','height','position','float','padding-left','padding-right','padding','text-align']
+
+ feeds = [(u'F1 Ultra', u'http://www.f1ultra.pl/index.php?option=com_rd_rss&id=1&Itemid=245')]
diff --git a/recipes/foreign_policy.recipe b/recipes/foreign_policy.recipe
index 893d055a05..4ddecf842f 100644
--- a/recipes/foreign_policy.recipe
+++ b/recipes/foreign_policy.recipe
@@ -8,6 +8,7 @@ from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1349086293(BasicNewsRecipe):
title = u'Foreign Policy'
+ language = 'en'
__author__ = 'Darko Miletic'
description = 'International News'
publisher = 'Washingtonpost.Newsweek Interactive, LLC'
diff --git a/recipes/gazeta_pl_krakow.recipe b/recipes/gazeta_pl_krakow.recipe
index 0f35e536f6..59188a5d6a 100644
--- a/recipes/gazeta_pl_krakow.recipe
+++ b/recipes/gazeta_pl_krakow.recipe
@@ -8,7 +8,6 @@ krakow.gazeta.pl
'''
from calibre.web.feeds.news import BasicNewsRecipe
-import re
class gw_krakow(BasicNewsRecipe):
title = u'Gazeta.pl Kraków'
@@ -46,7 +45,7 @@ class gw_krakow(BasicNewsRecipe):
remove_tags.append(dict(name = 'div', attrs = {'id' : 'gazeta_article_buttons'}))
remove_tags_after = [dict(name = 'div', attrs = {'id' : 'gazeta_article_share'})]
-
+
feeds = [(u'Wiadomości', u'http://rss.gazeta.pl/pub/rss/krakow.xml')]
def skip_ad_pages(self, soup):
diff --git a/recipes/gazeta_pl_warszawa.recipe b/recipes/gazeta_pl_warszawa.recipe
index 7a43931db4..2d95bcc06f 100644
--- a/recipes/gazeta_pl_warszawa.recipe
+++ b/recipes/gazeta_pl_warszawa.recipe
@@ -8,7 +8,6 @@ warszawa.gazeta.pl
'''
from calibre.web.feeds.news import BasicNewsRecipe
-import re
class gw_wawa(BasicNewsRecipe):
title = u'Gazeta.pl Warszawa'
@@ -43,7 +42,7 @@ class gw_wawa(BasicNewsRecipe):
remove_tags.append(dict(name = 'div', attrs = {'class' : 'gazeta_article_related_new'}))
remove_tags.append(dict(name = 'div', attrs = {'class' : 'gazetaVideoPlayer'}))
remove_tags.append(dict(name = 'div', attrs = {'id' : 'gazeta_article_miniatures'}))
-
+
feeds = [(u'Wiadomości', u'http://rss.gazeta.pl/pub/rss/warszawa.xml')]
def skip_ad_pages(self, soup):
diff --git a/recipes/icons/antyweb.png b/recipes/icons/antyweb.png
new file mode 100644
index 0000000000..8ca9870f60
Binary files /dev/null and b/recipes/icons/antyweb.png differ
diff --git a/recipes/icons/bankier_pl.png b/recipes/icons/bankier_pl.png
new file mode 100644
index 0000000000..c26f006a57
Binary files /dev/null and b/recipes/icons/bankier_pl.png differ
diff --git a/recipes/icons/business_standard.png b/recipes/icons/business_standard.png
index 1edff420c0..f4c04e566a 100644
Binary files a/recipes/icons/business_standard.png and b/recipes/icons/business_standard.png differ
diff --git a/recipes/icons/f1_ultra.png b/recipes/icons/f1_ultra.png
new file mode 100644
index 0000000000..f45a94f53a
Binary files /dev/null and b/recipes/icons/f1_ultra.png differ
diff --git a/recipes/icons/myapple_pl.png b/recipes/icons/myapple_pl.png
new file mode 100644
index 0000000000..a68cf4e7ef
Binary files /dev/null and b/recipes/icons/myapple_pl.png differ
diff --git a/recipes/icons/telepolis_pl.png b/recipes/icons/telepolis_pl.png
new file mode 100644
index 0000000000..0b94658d94
Binary files /dev/null and b/recipes/icons/telepolis_pl.png differ
diff --git a/recipes/ihned.cz.recipe b/recipes/ihned.cz.recipe
new file mode 100644
index 0000000000..a35be06dd1
--- /dev/null
+++ b/recipes/ihned.cz.recipe
@@ -0,0 +1,36 @@
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
+from __future__ import unicode_literals
+from calibre.web.feeds.recipes import BasicNewsRecipe
+
+class ihnedRecipe(BasicNewsRecipe):
+ __author__ = 'bubak'
+ title = u'iHNed.cz'
+ publisher = u''
+ description = 'ihned.cz'
+ oldest_article = 1
+ max_articles_per_feed = 20
+ use_embedded_content = False
+
+ feeds = [
+ (u'Zprávy', u'http://zpravy.ihned.cz/?m=rss'),
+ (u'Hospodářské noviny', u'http://hn.ihned.cz/?p=500000_rss'),
+ (u'Byznys', u'http://byznys.ihned.cz/?m=rss'),
+ (u'Life', u'http://life.ihned.cz/?m=rss'),
+ (u'Dialog', u'http://dialog.ihned.cz/?m=rss')
+ ]
+
+
+ #encoding = 'iso-8859-2'
+ language = 'cs'
+ cover_url = 'http://rss.ihned.cz/img/0/0_hp09/ihned.cz.gif'
+ remove_javascript = True
+ no_stylesheets = True
+ extra_css = """
+ """
+
+ remove_attributes = []
+ remove_tags_before = dict(name='div', attrs={'id':['heading']})
+ remove_tags_after = dict(name='div', attrs={'id':['next-authors']})
+ remove_tags = [dict(name='ul', attrs={'id':['comm']}),
+ dict(name='div', attrs={'id':['r-big']}),
+ dict(name='div', attrs={'class':['tools tools-top']})]
diff --git a/recipes/insider.recipe b/recipes/insider.recipe
new file mode 100644
index 0000000000..faaf00a14a
--- /dev/null
+++ b/recipes/insider.recipe
@@ -0,0 +1,59 @@
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
+from __future__ import unicode_literals
+import re
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class insider(BasicNewsRecipe):
+ __author__ = 'bubak'
+ title = 'Insider'
+ language = 'cs'
+
+ remove_tags = [dict(name='div', attrs={'class':'article-related-content'})
+ ,dict(name='div', attrs={'class':'calendar'})
+ ,dict(name='span', attrs={'id':'labelHolder'})
+ ]
+
+ no_stylesheets = True
+ keep_only_tags = [dict(name='div', attrs={'class':['doubleBlock textContentFormat']})]
+
+ preprocess_regexps = [(re.compile(r'T.mata:.*', re.DOTALL|re.IGNORECASE), lambda m: '