')]
+
+ keep_only_tags = []
+
+ visited_urls = {}
+ def get_article_url(self, article):
+ url = BasicNewsRecipe.get_article_url(self, article)
+ if url in self.visited_urls:
+ self.log.debug('Ignoring duplicate: ' + url)
+ return None
+ else:
+ self.visited_urls[url] = True
+ self.log.debug('Accepting: ' + url)
+ return url
+
+ def encoding(self, source):
+ if source.newurl.find('blog.aktualne') >= 0:
+ enc = 'utf-8'
+ else:
+ enc = 'iso-8859-2'
+ self.log.debug('Called encoding ' + enc + " " + str(source.newurl))
+ return source.decode(enc, 'replace')
+
diff --git a/recipes/blesk.recipe b/recipes/blesk.recipe
new file mode 100644
index 0000000000..7eff4c42d0
--- /dev/null
+++ b/recipes/blesk.recipe
@@ -0,0 +1,55 @@
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
+from __future__ import unicode_literals
+from calibre.web.feeds.recipes import BasicNewsRecipe
+import re
+
+class bleskRecipe(BasicNewsRecipe):
+ __author__ = 'bubak'
+ title = u'Blesk'
+ publisher = u''
+ description = 'blesk.cz'
+ oldest_article = 1
+ max_articles_per_feed = 20
+ use_embedded_content = False
+
+ feeds = [
+ (u'Zprávy', u'http://www.blesk.cz/rss/7'),
+ (u'Blesk', u'http://www.blesk.cz/rss/1'),
+ (u'Sex a tabu', u'http://www.blesk.cz/rss/2'),
+ (u'Celebrity', u'http://www.blesk.cz/rss/5'),
+ (u'Cestování', u'http://www.blesk.cz/rss/12')
+ ]
+
+
+ #encoding = 'iso-8859-2'
+ language = 'cs'
+ cover_url = 'http://img.blesk.cz/images/blesk/blesk-logo.png'
+ remove_javascript = True
+ no_stylesheets = True
+ extra_css = """
+ """
+
+ remove_attributes = []
+ remove_tags_before = dict(name='div', attrs={'id':['boxContent']})
+ remove_tags_after = dict(name='div', attrs={'class':['artAuthors']})
+ remove_tags = [dict(name='div', attrs={'class':['link_clanek']}),
+ dict(name='div', attrs={'id':['partHeader']}),
+ dict(name='div', attrs={'id':['top_bottom_box', 'lista_top']})]
+ preprocess_regexps = [(re.compile(r'
')]
+
+ keep_only_tags = [dict(name='div', attrs={'class':'articleContent'})]
+
+ visited_urls = {}
+ def get_article_url(self, article):
+ url = BasicNewsRecipe.get_article_url(self, article)
+ if url in self.visited_urls:
+ self.log.debug('Ignoring duplicate: ' + url)
+ return None
+ else:
+ self.visited_urls[url] = True
+ self.log.debug('Accepting: ' + url)
+ return url
+
+
+
+
diff --git a/recipes/buchreport.recipe b/recipes/buchreport.recipe
index 68d98d0622..5ed34d1ee8 100644
--- a/recipes/buchreport.recipe
+++ b/recipes/buchreport.recipe
@@ -11,7 +11,7 @@ class Buchreport(BasicNewsRecipe) :
title = u'Buchreport'
timefmt = ' [%d.%m.%Y]'
encoding = 'cp1252'
- language = 'de_DE'
+ language = 'de'
extra_css = 'body { margin-left: 0.00em; margin-right: 0.00em; } \
diff --git a/recipes/ceska_pozice.recipe b/recipes/ceska_pozice.recipe
new file mode 100644
index 0000000000..478f6823b9
--- /dev/null
+++ b/recipes/ceska_pozice.recipe
@@ -0,0 +1,68 @@
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
+from __future__ import unicode_literals
+from calibre.web.feeds.recipes import BasicNewsRecipe
+
+class ceskaPoziceRecipe(BasicNewsRecipe):
+ __author__ = 'bubak'
+ title = u'Česká pozice'
+ description = 'Česká pozice'
+ oldest_article = 2
+ max_articles_per_feed = 20
+
+ feeds = [
+ (u'Všechny články', u'http://www.ceskapozice.cz/rss.xml'),
+ (u'Domov', u'http://www.ceskapozice.cz/taxonomy/term/16/feed'),
+ (u'Chrono', u'http://www.ceskapozice.cz/chrono/feed'),
+ (u'Evropa', u'http://www.ceskapozice.cz/taxonomy/term/17/feed')
+ ]
+
+
+ language = 'cs'
+ cover_url = 'http://www.ceskapozice.cz/sites/default/files/cpozice_logo.png'
+ remove_javascript = True
+ no_stylesheets = True
+ domain = u'http://www.ceskapozice.cz'
+ use_embedded_content = False
+
+
+ remove_tags = [dict(name='div', attrs={'class':['block-ad', 'region region-content-ad']}),
+ dict(name='ul', attrs={'class':'links'}),
+ dict(name='div', attrs={'id':['comments', 'back-to-top']}),
+ dict(name='div', attrs={'class':['next-page', 'region region-content-ad']}),
+ dict(name='cite')]
+
+ keep_only_tags = [dict(name='div', attrs={'id':'content'})]
+
+ visited_urls = {}
+ def get_article_url(self, article):
+ url = BasicNewsRecipe.get_article_url(self, article)
+ if url in self.visited_urls:
+ self.log.debug('Ignoring duplicate: ' + url)
+ return None
+ else:
+ self.visited_urls[url] = True
+ self.log.debug('Accepting: ' + url)
+ return url
+
+ def preprocess_html(self, soup):
+ self.append_page(soup, soup.body, 3)
+ return soup
+
+ def append_page(self, soup, appendtag, position):
+ pager = soup.find('div', attrs={'class':'paging-bottom'})
+ if pager:
+ nextbutton = pager.find('li', attrs={'class':'pager-next'})
+ if nextbutton:
+ nexturl = self.domain + nextbutton.a['href']
+ soup2 = self.index_to_soup(nexturl)
+ texttag = soup2.find('div', attrs={'class':'main-body'})
+ for it in texttag.findAll('div', attrs={'class':'region region-content-ad'}):
+ it.extract()
+ for it in texttag.findAll('cite'):
+ it.extract()
+ newpos = len(texttag.contents)
+ self.append_page(soup2, texttag, newpos)
+ texttag.extract()
+ appendtag.insert(position, texttag)
+ pager.extract()
+
diff --git a/recipes/ceske_noviny.recipe b/recipes/ceske_noviny.recipe
new file mode 100644
index 0000000000..10dd16689d
--- /dev/null
+++ b/recipes/ceske_noviny.recipe
@@ -0,0 +1,30 @@
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
+from __future__ import unicode_literals
+from calibre.web.feeds.recipes import BasicNewsRecipe
+
+class ceskenovinyRecipe(BasicNewsRecipe):
+ __author__ = 'bubak'
+ title = u'České Noviny'
+ description = 'ceskenoviny.cz'
+ oldest_article = 1
+ max_articles_per_feed = 20
+
+ feeds = [
+ (u'Domácí', u'http://www.ceskenoviny.cz/sluzby/rss/domov.php')
+ #,(u'Hlavní události', u'http://www.ceskenoviny.cz/sluzby/rss/index.php')
+ #,(u'Přehled zpráv', u'http://www.ceskenoviny.cz/sluzby/rss/zpravy.php')
+ #,(u'Ze světa', u'http://www.ceskenoviny.cz/sluzby/rss/svet.php')
+ #,(u'Kultura', u'http://www.ceskenoviny.cz/sluzby/rss/kultura.php')
+ #,(u'IT', u'http://www.ceskenoviny.cz/sluzby/rss/pocitace.php')
+ ]
+
+
+ language = 'cs'
+ cover_url = 'http://i4.cn.cz/grafika/cn_logo-print.gif'
+ remove_javascript = True
+ no_stylesheets = True
+
+ remove_attributes = []
+ filter_regexps = [r'img.aktualne.centrum.cz']
+
+ keep_only_tags = [dict(name='div', attrs={'id':'clnk'})]
diff --git a/recipes/cesky_rozhlas_6.recipe b/recipes/cesky_rozhlas_6.recipe
new file mode 100644
index 0000000000..eca32af02c
--- /dev/null
+++ b/recipes/cesky_rozhlas_6.recipe
@@ -0,0 +1,26 @@
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
+from __future__ import unicode_literals
+from calibre.web.feeds.recipes import BasicNewsRecipe
+
+class cro6Recipe(BasicNewsRecipe):
+ __author__ = 'bubak'
+ title = u'Český rozhlas 6'
+ description = 'Český rozhlas 6'
+ oldest_article = 1
+ max_articles_per_feed = 20
+
+ feeds = [
+ (u'Český rozhlas 6', u'http://www.rozhlas.cz/export/cro6/')
+ ]
+
+
+ language = 'cs'
+ cover_url = 'http://www.rozhlas.cz/img/e5/logo/cro6.png'
+ remove_javascript = True
+ no_stylesheets = True
+
+ remove_attributes = []
+ remove_tags = [dict(name='div', attrs={'class':['audio-play-all', 'poradHeaders', 'actions']}),
+ dict(name='p', attrs={'class':['para-last']})]
+
+ keep_only_tags = [dict(name='div', attrs={'id':'article'})]
diff --git a/recipes/demagog.cz.recipe b/recipes/demagog.cz.recipe
new file mode 100644
index 0000000000..7d89af41bd
--- /dev/null
+++ b/recipes/demagog.cz.recipe
@@ -0,0 +1,39 @@
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
+from __future__ import unicode_literals
+from calibre.web.feeds.recipes import BasicNewsRecipe
+import re
+
+class demagogRecipe(BasicNewsRecipe):
+ __author__ = 'bubak'
+ title = u'Demagog.cz'
+ publisher = u''
+ description = 'demagog.cz'
+ oldest_article = 6
+ max_articles_per_feed = 20
+ use_embedded_content = False
+ remove_empty_feeds = True
+
+ feeds = [
+ (u'Aktuality', u'http://demagog.cz/rss')
+ ]
+
+
+ #encoding = 'iso-8859-2'
+ language = 'cs'
+ cover_url = 'http://demagog.cz/content/images/demagog.cz.png'
+ remove_javascript = True
+ no_stylesheets = True
+ extra_css = """
+ .vyrok_suhrn{margin-top:50px; }
+ .vyrok{margin-bottom:30px; }
+ """
+
+ remove_tags = [dict(name='a', attrs={'class':'vyrok_odovodnenie_tgl'}),
+ dict(name='img', attrs={'class':'vyrok_fotografia'})]
+ remove_tags_before = dict(name='h1')
+ remove_tags_after = dict(name='div', attrs={'class':'vyrok_text_after'})
+ preprocess_regexps = [(re.compile(r'(
)', re.DOTALL|re.IGNORECASE), lambda match: '\1
')]
+
+
+
+
diff --git a/recipes/denik.cz.recipe b/recipes/denik.cz.recipe
new file mode 100644
index 0000000000..2ccf8caa40
--- /dev/null
+++ b/recipes/denik.cz.recipe
@@ -0,0 +1,36 @@
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
+from __future__ import unicode_literals
+from calibre.web.feeds.recipes import BasicNewsRecipe
+
+class ceskyDenikRecipe(BasicNewsRecipe):
+ __author__ = 'bubak'
+ title = u'denik.cz'
+ publisher = u''
+ description = u'Český deník'
+ oldest_article = 1
+ max_articles_per_feed = 20
+ use_embedded_content = False
+ remove_empty_feeds = True
+
+ feeds = [
+ (u'Z domova', u'http://www.denik.cz/rss/z_domova.html')
+ ,(u'Pražský deník - Moje Praha', u'http://prazsky.denik.cz/rss/zpravy_region.html')
+ #,(u'Zahraničí', u'http://www.denik.cz/rss/ze_sveta.html')
+ #,(u'Kultura', u'http://www.denik.cz/rss/kultura.html')
+ ]
+
+
+ #encoding = 'iso-8859-2'
+ language = 'cs'
+ cover_url = 'http://g.denik.cz/images/loga/denik.png'
+ remove_javascript = True
+ no_stylesheets = True
+ extra_css = """
+ """
+
+ remove_tags = []
+ keep_only_tags = [dict(name='div', attrs={'class':'content'})]
+ #remove_tags_before = dict(name='h1')
+ remove_tags_after = dict(name='p', attrs={'class':'clanek-autor'})
+
+
diff --git a/recipes/denik_referendum.recipe b/recipes/denik_referendum.recipe
new file mode 100644
index 0000000000..e04871d067
--- /dev/null
+++ b/recipes/denik_referendum.recipe
@@ -0,0 +1,28 @@
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
+from __future__ import unicode_literals
+from calibre.web.feeds.recipes import BasicNewsRecipe
+
+class denikReferendumRecipe(BasicNewsRecipe):
+ __author__ = 'bubak'
+ title = u'Den\u00edk Referendum'
+ publisher = u''
+ description = ''
+ oldest_article = 1
+ max_articles_per_feed = 20
+
+ feeds = [
+ (u'Deník Referendum', u'http://feeds.feedburner.com/DenikReferendum')
+ ]
+
+
+ #encoding = 'iso-8859-2'
+ language = 'cs'
+ remove_javascript = True
+ no_stylesheets = True
+ use_embedded_content = False
+ remove_attributes = []
+ remove_tags_after = dict(name='div', attrs={'class':['text']})
+ remove_tags = [dict(name='div', attrs={'class':['box boxLine', 'box noprint', 'box']}),
+ dict(name='h3', attrs={'class':'head alt'})]
+
+ keep_only_tags = [dict(name='div', attrs={'id':['content']})]
diff --git a/recipes/editoriali.recipe b/recipes/editoriali.recipe
index 1b0c558df4..c5596bd743 100644
--- a/recipes/editoriali.recipe
+++ b/recipes/editoriali.recipe
@@ -7,6 +7,7 @@ class AdvancedUserRecipe1332847053(BasicNewsRecipe):
title = u'Editoriali'
__author__ = 'faber1971'
description = 'Leading articles on Italy by the best Italian editorials'
+ language = 'it'
oldest_article = 1
max_articles_per_feed = 100
diff --git a/recipes/foreign_policy.recipe b/recipes/foreign_policy.recipe
index 893d055a05..4ddecf842f 100644
--- a/recipes/foreign_policy.recipe
+++ b/recipes/foreign_policy.recipe
@@ -8,6 +8,7 @@ from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1349086293(BasicNewsRecipe):
title = u'Foreign Policy'
+ language = 'en'
__author__ = 'Darko Miletic'
description = 'International News'
publisher = 'Washingtonpost.Newsweek Interactive, LLC'
diff --git a/recipes/ihned.cz.recipe b/recipes/ihned.cz.recipe
new file mode 100644
index 0000000000..a35be06dd1
--- /dev/null
+++ b/recipes/ihned.cz.recipe
@@ -0,0 +1,36 @@
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
+from __future__ import unicode_literals
+from calibre.web.feeds.recipes import BasicNewsRecipe
+
+class ihnedRecipe(BasicNewsRecipe):
+ __author__ = 'bubak'
+ title = u'iHNed.cz'
+ publisher = u''
+ description = 'ihned.cz'
+ oldest_article = 1
+ max_articles_per_feed = 20
+ use_embedded_content = False
+
+ feeds = [
+ (u'Zprávy', u'http://zpravy.ihned.cz/?m=rss'),
+ (u'Hospodářské noviny', u'http://hn.ihned.cz/?p=500000_rss'),
+ (u'Byznys', u'http://byznys.ihned.cz/?m=rss'),
+ (u'Life', u'http://life.ihned.cz/?m=rss'),
+ (u'Dialog', u'http://dialog.ihned.cz/?m=rss')
+ ]
+
+
+ #encoding = 'iso-8859-2'
+ language = 'cs'
+ cover_url = 'http://rss.ihned.cz/img/0/0_hp09/ihned.cz.gif'
+ remove_javascript = True
+ no_stylesheets = True
+ extra_css = """
+ """
+
+ remove_attributes = []
+ remove_tags_before = dict(name='div', attrs={'id':['heading']})
+ remove_tags_after = dict(name='div', attrs={'id':['next-authors']})
+ remove_tags = [dict(name='ul', attrs={'id':['comm']}),
+ dict(name='div', attrs={'id':['r-big']}),
+ dict(name='div', attrs={'class':['tools tools-top']})]
diff --git a/recipes/insider.recipe b/recipes/insider.recipe
new file mode 100644
index 0000000000..faaf00a14a
--- /dev/null
+++ b/recipes/insider.recipe
@@ -0,0 +1,59 @@
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
+from __future__ import unicode_literals
+import re
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class insider(BasicNewsRecipe):
+ __author__ = 'bubak'
+ title = 'Insider'
+ language = 'cs'
+
+ remove_tags = [dict(name='div', attrs={'class':'article-related-content'})
+ ,dict(name='div', attrs={'class':'calendar'})
+ ,dict(name='span', attrs={'id':'labelHolder'})
+ ]
+
+ no_stylesheets = True
+ keep_only_tags = [dict(name='div', attrs={'class':['doubleBlock textContentFormat']})]
+
+ preprocess_regexps = [(re.compile(r'T.mata:.*', re.DOTALL|re.IGNORECASE), lambda m: '