diff --git a/recipes/adventure_zone_pl.recipe b/recipes/adventure_zone_pl.recipe
index dd47af946a..00b4a8753e 100644
--- a/recipes/adventure_zone_pl.recipe
+++ b/recipes/adventure_zone_pl.recipe
@@ -10,15 +10,15 @@ class Adventure_zone(BasicNewsRecipe):
oldest_article = 20
max_articles_per_feed = 100
cover_url = 'http://www.adventure-zone.info/inne/logoaz_2012.png'
- index='http://www.adventure-zone.info/fusion/'
+ index = 'http://www.adventure-zone.info/fusion/'
use_embedded_content = False
preprocess_regexps = [(re.compile(r"
Komentarze | ", re.IGNORECASE), lambda m: ''),
(re.compile(r'?table.*?>'), lambda match: ''),
(re.compile(r'?tbody.*?>'), lambda match: '')]
- remove_tags_before= dict(name='td', attrs={'class':'main-bg'})
- remove_tags= [dict(name='img', attrs={'alt':'Drukuj'})]
- remove_tags_after= dict(id='comments')
- extra_css = '.main-bg{text-align: left;} td.capmain{ font-size: 22px; }'
+ remove_tags_before = dict(name='td', attrs={'class':'main-bg'})
+ remove_tags = [dict(name='img', attrs={'alt':'Drukuj'})]
+ remove_tags_after = dict(id='comments')
+ extra_css = '.main-bg{text-align: left;} td.capmain{ font-size: 22px; } img.news-category {float: left; margin-right: 5px;}'
feeds = [(u'Nowinki', u'http://www.adventure-zone.info/fusion/feeds/news.php')]
'''def get_cover_url(self):
@@ -26,7 +26,7 @@ class Adventure_zone(BasicNewsRecipe):
cover=soup.find(id='box_OstatninumerAZ')
self.cover_url='http://www.adventure-zone.info/fusion/'+ cover.center.a.img['src']
return getattr(self, 'cover_url', self.cover_url)'''
-
+
def populate_article_metadata(self, article, soup, first):
result = re.search('(.+) - Adventure Zone', soup.title.string)
if result:
@@ -66,5 +66,4 @@ class Adventure_zone(BasicNewsRecipe):
if a.has_key('href') and 'http://' not in a['href'] and 'https://' not in a['href']:
a['href']=self.index + a['href']
return soup
-
-
+
diff --git a/recipes/astroflesz.recipe b/recipes/astroflesz.recipe
index 745ade420c..11a56ec6b5 100644
--- a/recipes/astroflesz.recipe
+++ b/recipes/astroflesz.recipe
@@ -18,3 +18,10 @@ class Astroflesz(BasicNewsRecipe):
remove_tags_after = dict(name='div', attrs={'class':'itemLinks'})
remove_tags = [dict(name='div', attrs={'class':['itemLinks', 'itemToolbar', 'itemRatingBlock']})]
feeds = [(u'Wszystkie', u'http://astroflesz.pl/?format=feed')]
+
+ def postprocess_html(self, soup, first_fetch):
+ t = soup.find(attrs={'class':'itemIntroText'})
+ if t:
+ for i in t.findAll('img'):
+ i['style'] = 'float: left; margin-right: 5px;'
+ return soup
diff --git a/recipes/ciekawostki_historyczne.recipe b/recipes/ciekawostki_historyczne.recipe
index b45f28e4ba..42ea94fa1d 100644
--- a/recipes/ciekawostki_historyczne.recipe
+++ b/recipes/ciekawostki_historyczne.recipe
@@ -11,7 +11,8 @@ class Ciekawostki_Historyczne(BasicNewsRecipe):
masthead_url = 'http://ciekawostkihistoryczne.pl/wp-content/themes/Wordpress_Magazine/images/logo-ciekawostki-historyczne-male.jpg'
cover_url = 'http://ciekawostkihistoryczne.pl/wp-content/themes/Wordpress_Magazine/images/logo-ciekawostki-historyczne-male.jpg'
max_articles_per_feed = 100
- oldest_article = 140000
+ extra_css = 'img.alignleft {float:left; margin-right:5px;} .alignright {float:right; margin-left:5px;}'
+ oldest_article = 12
preprocess_regexps = [(re.compile(ur'Ten artykuł ma kilka stron.*?', re.DOTALL), lambda match: ''), (re.compile(ur'Zobacz też:
.*?', re.DOTALL), lambda match: '')]
no_stylesheets = True
remove_empty_feeds = True
diff --git a/recipes/conowego_pl.recipe b/recipes/conowego_pl.recipe
index 9b2f6e8200..7e6549e713 100644
--- a/recipes/conowego_pl.recipe
+++ b/recipes/conowego_pl.recipe
@@ -11,6 +11,7 @@ class CoNowegoPl(BasicNewsRecipe):
oldest_article = 7
max_articles_per_feed = 100
INDEX = 'http://www.conowego.pl/'
+ extra_css = '.news-single-img {float:left; margin-right:5px;}'
no_stylesheets = True
remove_empty_feeds = True
use_embedded_content = False
@@ -35,7 +36,7 @@ class CoNowegoPl(BasicNewsRecipe):
pagetext = soup2.find(attrs={'class':'ni_content'})
pos = len(appendtag.contents)
appendtag.insert(pos, pagetext)
-
+
comments = appendtag.findAll(text=lambda text:isinstance(text, Comment))
for comment in comments:
comment.extract()
diff --git a/recipes/czas_gentlemanow.recipe b/recipes/czas_gentlemanow.recipe
index 009cc7e9dd..d9b6ab78c7 100644
--- a/recipes/czas_gentlemanow.recipe
+++ b/recipes/czas_gentlemanow.recipe
@@ -12,11 +12,13 @@ class CzasGentlemanow(BasicNewsRecipe):
ignore_duplicate_articles = {'title', 'url'}
oldest_article = 7
max_articles_per_feed = 100
+ extra_css = '.gallery-item {float:left; margin-right: 10px; max-width: 20%;} .alignright {text-align: right; float:right; margin-left:5px;}\
+ .wp-caption-text {text-align: left;} img.aligncenter {display: block; margin-left: auto; margin-right: auto;} .alignleft {float: left; margin-right:5px;}'
no_stylesheets = True
remove_empty_feeds = True
preprocess_regexps = [(re.compile(u'Może Cię też zainteresować:
'), lambda m: '')]
use_embedded_content = False
keep_only_tags = [dict(name='div', attrs={'class':'content'})]
- remove_tags = [dict(attrs={'class':'meta_comments'}), dict(id=['comments', 'related_posts_thumbnails'])]
+ remove_tags = [dict(attrs={'class':'meta_comments'}), dict(id=['comments', 'related_posts_thumbnails', 'respond'])]
remove_tags_after = dict(id='comments')
feeds = [(u'M\u0119ski \u015awiat', u'http://czasgentlemanow.pl/category/meski-swiat/feed/'), (u'Styl', u'http://czasgentlemanow.pl/category/styl/feed/'), (u'Vademecum Gentlemana', u'http://czasgentlemanow.pl/category/vademecum/feed/'), (u'Dom i rodzina', u'http://czasgentlemanow.pl/category/dom-i-rodzina/feed/'), (u'Honor', u'http://czasgentlemanow.pl/category/honor/feed/'), (u'Gad\u017cety Gentlemana', u'http://czasgentlemanow.pl/category/gadzety-gentlemana/feed/')]
diff --git a/recipes/dobreprogamy.recipe b/recipes/dobreprogamy.recipe
index 708bdbb017..f37059becf 100644
--- a/recipes/dobreprogamy.recipe
+++ b/recipes/dobreprogamy.recipe
@@ -16,6 +16,7 @@ class Dobreprogramy_pl(BasicNewsRecipe):
extra_css = '.title {font-size:22px;}'
oldest_article = 8
max_articles_per_feed = 100
+ remove_attrs = ['style', 'width', 'height']
preprocess_regexps = [(re.compile(ur'Twoja przeglądarka nie obsługuje Flasha i HTML5 lub wyłączono obsługę JavaScript...
'), lambda match: '') ]
keep_only_tags=[dict(attrs={'class':['news', 'entry single']})]
remove_tags = [dict(attrs={'class':['newsOptions', 'noPrint', 'komentarze', 'tags font-heading-master']}), dict(id='komentarze'), dict(name='iframe')]
@@ -28,4 +29,11 @@ class Dobreprogramy_pl(BasicNewsRecipe):
for a in soup('a'):
if a.has_key('href') and 'http://' not in a['href'] and 'https://' not in a['href']:
a['href']=self.index + a['href']
+ for r in soup.findAll('iframe'):
+ r.parent.extract()
return soup
+ def postprocess_html(self, soup, first_fetch):
+ for r in soup.findAll('span', text=''):
+ if not r.string:
+ r.extract()
+ return soup
\ No newline at end of file
diff --git a/recipes/dzieje_pl.recipe b/recipes/dzieje_pl.recipe
index 50de40354c..cdd0630891 100644
--- a/recipes/dzieje_pl.recipe
+++ b/recipes/dzieje_pl.recipe
@@ -9,6 +9,7 @@ class Dzieje(BasicNewsRecipe):
category = 'history'
language = 'pl'
ignore_duplicate_articles = {'title', 'url'}
+ extra_css = '.imagecache-default {float:left; margin-right:20px;}'
index = 'http://dzieje.pl'
oldest_article = 8
max_articles_per_feed = 100
diff --git a/recipes/ekologia_pl.recipe b/recipes/ekologia_pl.recipe
index 21d3b607d2..e925ebad6f 100644
--- a/recipes/ekologia_pl.recipe
+++ b/recipes/ekologia_pl.recipe
@@ -9,7 +9,7 @@ class EkologiaPl(BasicNewsRecipe):
language = 'pl'
cover_url = 'http://www.ekologia.pl/assets/images/logo/ekologia_pl_223x69.png'
ignore_duplicate_articles = {'title', 'url'}
- extra_css = '.title {font-size: 200%;}'
+ extra_css = '.title {font-size: 200%;} .imagePowiazane, .imgCon {float:left; margin-right:5px;}'
oldest_article = 7
max_articles_per_feed = 100
no_stylesheets = True
diff --git a/recipes/film_org_pl.recipe b/recipes/film_org_pl.recipe
index 442e273b1b..fa0a69912b 100644
--- a/recipes/film_org_pl.recipe
+++ b/recipes/film_org_pl.recipe
@@ -7,6 +7,7 @@ class FilmOrgPl(BasicNewsRecipe):
description = u"Recenzje, analizy, artykuły, rankingi - wszystko o filmie dla miłośników kina. Opisy efektów specjalnych, wersji reżyserskich, remake'ów, sequeli. No i forum filmowe. Jedne z największych w Polsce."
category = 'film'
language = 'pl'
+ extra_css = '.alignright {float:right; margin-left:5px;} .alignleft {float:left; margin-right:5px;}'
cover_url = 'http://film.org.pl/wp-content/themes/KMF/images/logo_kmf10.png'
ignore_duplicate_articles = {'title', 'url'}
oldest_article = 7
diff --git a/recipes/film_web.recipe b/recipes/film_web.recipe
index 3a86438d1c..780dcbe9b2 100644
--- a/recipes/film_web.recipe
+++ b/recipes/film_web.recipe
@@ -10,7 +10,6 @@ class FilmWebPl(BasicNewsRecipe):
category = 'movies'
language = 'pl'
index = 'http://www.filmweb.pl'
- #extra_css = '.MarkupPhotoHTML-7 {float:left; margin-right: 10px;}'
oldest_article = 8
max_articles_per_feed = 100
no_stylesheets = True
@@ -19,9 +18,9 @@ class FilmWebPl(BasicNewsRecipe):
remove_javascript = True
preprocess_regexps = [(re.compile(u'\(kliknij\,\ aby powiększyć\)', re.IGNORECASE), lambda m: ''), (re.compile(ur'(
\s*?
\s*?)+', re.IGNORECASE), lambda m: '
')]#(re.compile(ur' | ', re.IGNORECASE), lambda m: '')]
extra_css = '.hdrBig {font-size:22px;} ul {list-style-type:none; padding: 0; margin: 0;}'
- remove_tags = [dict(name='div', attrs={'class':['recommendOthers']}), dict(name='ul', attrs={'class':'fontSizeSet'}), dict(attrs={'class':'userSurname anno'})]
+ #remove_tags = [dict()]
remove_attributes = ['style',]
- keep_only_tags = [dict(name='h1', attrs={'class':['hdrBig', 'hdrEntity']}), dict(name='div', attrs={'class':['newsInfo', 'newsInfoSmall', 'reviewContent description']})]
+ keep_only_tags = [dict(attrs={'class':['hdr hdr-super', 'newsContent']})]
feeds = [(u'News / Filmy w produkcji', 'http://www.filmweb.pl/feed/news/category/filminproduction'),
(u'News / Festiwale, nagrody i przeglądy', u'http://www.filmweb.pl/feed/news/category/festival'),
(u'News / Seriale', u'http://www.filmweb.pl/feed/news/category/serials'),
@@ -44,12 +43,12 @@ class FilmWebPl(BasicNewsRecipe):
skip_tag = soup.find('a', attrs={'class':'welcomeScreenButton'})
if skip_tag is not None:
return self.index_to_soup(skip_tag['href'], raw=True)
-
+
def postprocess_html(self, soup, first_fetch):
for r in soup.findAll(attrs={'class':'singlephoto'}):
r['style'] = 'float:left; margin-right: 10px;'
return soup
-
+
def preprocess_html(self, soup):
for a in soup('a'):
if a.has_key('href') and 'http://' not in a['href'] and 'https://' not in a['href']:
@@ -59,11 +58,6 @@ class FilmWebPl(BasicNewsRecipe):
for i in soup.findAll('sup'):
if not i.string or i.string.startswith('(kliknij'):
i.extract()
- tag = soup.find(name='ul', attrs={'class':'inline sep-line'})
- if tag:
- tag.name = 'div'
- for t in tag.findAll('li'):
- t.name = 'div'
for r in soup.findAll(id=re.compile('photo-\d+')):
r.extract()
for r in soup.findAll(style=re.compile('float: ?left')):
diff --git a/recipes/niebezpiecznik.recipe b/recipes/niebezpiecznik.recipe
index 2228ef5122..3b321772ec 100644
--- a/recipes/niebezpiecznik.recipe
+++ b/recipes/niebezpiecznik.recipe
@@ -9,8 +9,9 @@ class Niebezpiecznik_pl(BasicNewsRecipe):
oldest_article = 8
max_articles_per_feed = 100
no_stylesheets = True
+ remove_empty_feeds = True
cover_url = u'http://userlogos.org/files/logos/Karmody/niebezpiecznik_01.png'
remove_tags = [dict(name='div', attrs={'class':['sociable']}), dict(name='h4'), dict(attrs={'class':'similar-posts'})]
keep_only_tags = [dict(name='div', attrs={'class':['title', 'entry']})]
feeds = [(u'Wiadomości', u'http://feeds.feedburner.com/niebezpiecznik/'),
- ('Blog', 'http://feeds.feedburner.com/niebezpiecznik/linkblog/')]
+ ('Blog', 'http://feeds.feedburner.com/niebezpiecznik/linkblog/')]
diff --git a/recipes/wirtualnemedia_pl.recipe b/recipes/wirtualnemedia_pl.recipe
index 018891c243..155cafbec2 100644
--- a/recipes/wirtualnemedia_pl.recipe
+++ b/recipes/wirtualnemedia_pl.recipe
@@ -8,23 +8,24 @@ class WirtualneMedia(BasicNewsRecipe):
use_embedded_content = False
remove_empty_feeds = True
__author__ = 'fenuks'
+ extra_css = '.thumbnail {float:left; max-width:150px; margin-right:5px;}'
description = u'Portal o mediach, reklamie, internecie, PR, telekomunikacji - nr 1 w Polsce - WirtualneMedia.pl - wiadomości z pierwszej ręki.'
category = 'internet'
language = 'pl'
masthead_url= 'http://i.wp.pl/a/f/jpeg/8654/wirtualnemedia.jpeg'
cover_url= 'http://static.wirtualnemedia.pl/img/logo_wirtualnemedia_newsletter.gif'
remove_tags=[dict(id=['header', 'footer'])]
- feeds = [(u'Gospodarka', u'http://www.wirtualnemedia.pl/rss/wm_gospodarka.xml'),
- (u'Internet', u'http://www.wirtualnemedia.pl/rss/wm_internet.xml'),
- (u'Kultura', u'http://www.wirtualnemedia.pl/rss/wm_kulturarozrywka.xml'),
- (u'Badania', u'http://www.wirtualnemedia.pl/rss/wm_marketing.xml'),
- (u'Prasa', u'http://www.wirtualnemedia.pl/rss/wm_prasa.xml'),
- (u'Radio', u'http://www.wirtualnemedia.pl/rss/wm_radio.xml'),
- (u'Reklama', u'http://www.wirtualnemedia.pl/rss/wm_reklama.xml'),
- (u'PR', u'http://www.wirtualnemedia.pl/rss/wm_relations.xml'),
- (u'Technologie', u'http://www.wirtualnemedia.pl/rss/wm_telekomunikacja.xml'),
- (u'Telewizja', u'http://www.wirtualnemedia.pl/rss/wm_telewizja_rss.xml')
- ]
+ feeds = [(u'Gospodarka', u'http://www.wirtualnemedia.pl/rss/wm_gospodarka.xml'),
+ (u'Internet', u'http://www.wirtualnemedia.pl/rss/wm_internet.xml'),
+ (u'Kultura', u'http://www.wirtualnemedia.pl/rss/wm_kulturarozrywka.xml'),
+ (u'Badania', u'http://www.wirtualnemedia.pl/rss/wm_marketing.xml'),
+ (u'Prasa', u'http://www.wirtualnemedia.pl/rss/wm_prasa.xml'),
+ (u'Radio', u'http://www.wirtualnemedia.pl/rss/wm_radio.xml'),
+ (u'Reklama', u'http://www.wirtualnemedia.pl/rss/wm_reklama.xml'),
+ (u'PR', u'http://www.wirtualnemedia.pl/rss/wm_relations.xml'),
+ (u'Technologie', u'http://www.wirtualnemedia.pl/rss/wm_telekomunikacja.xml'),
+ (u'Telewizja', u'http://www.wirtualnemedia.pl/rss/wm_telewizja_rss.xml')
+ ]
def print_version(self, url):
- return url.replace('artykul', 'print')
\ No newline at end of file
+ return url.replace('artykul', 'print')
diff --git a/recipes/zaufana_trzecia_strona.recipe b/recipes/zaufana_trzecia_strona.recipe
index 13e7d98cce..14f2092d18 100644
--- a/recipes/zaufana_trzecia_strona.recipe
+++ b/recipes/zaufana_trzecia_strona.recipe
@@ -1,5 +1,6 @@
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:fdm=marker:ai
from calibre.web.feeds.news import BasicNewsRecipe
+
class ZTS(BasicNewsRecipe):
title = u'Zaufana Trzecia Strona'
__author__ = 'fenuks'
@@ -7,6 +8,7 @@ class ZTS(BasicNewsRecipe):
category = 'IT, security'
language = 'pl'
cover_url = 'http://www.zaufanatrzeciastrona.pl/wp-content/uploads/2012/08/z3s_h100.png'
+ extra_css = '.thumbnail {float: left; margin-right:5px;}'
oldest_article = 7
max_articles_per_feed = 100
no_stylesheets = True