diff --git a/recipes/adventure_zone_pl.recipe b/recipes/adventure_zone_pl.recipe
index 366b1ccf5a..65f4e3e52d 100644
--- a/recipes/adventure_zone_pl.recipe
+++ b/recipes/adventure_zone_pl.recipe
@@ -1,19 +1,38 @@
from calibre.web.feeds.news import BasicNewsRecipe
-
+import re
class Adventure_zone(BasicNewsRecipe):
title = u'Adventure Zone'
__author__ = 'fenuks'
description = 'Adventure zone - adventure games from A to Z'
category = 'games'
language = 'pl'
- oldest_article = 15
- max_articles_per_feed = 100
no_stylesheets = True
+ oldest_article = 20
+ max_articles_per_feed = 100
+ use_embedded_content=False
+ preprocess_regexps = [(re.compile(r"
Komentarze | ", re.IGNORECASE), lambda m: '')]
remove_tags_before= dict(name='td', attrs={'class':'main-bg'})
- remove_tags_after= dict(name='td', attrs={'class':'main-body middle-border'})
+ remove_tags= [dict(name='img', attrs={'alt':'Drukuj'})]
+ remove_tags_after= dict(id='comments')
extra_css = '.main-bg{text-align: left;} td.capmain{ font-size: 22px; }'
feeds = [(u'Nowinki', u'http://www.adventure-zone.info/fusion/feeds/news.php')]
+ def parse_feeds (self):
+ feeds = BasicNewsRecipe.parse_feeds(self)
+ soup=self.index_to_soup(u'http://www.adventure-zone.info/fusion/feeds/news.php')
+ tag=soup.find(name='channel')
+ titles=[]
+ for r in tag.findAll(name='image'):
+ r.extract()
+ art=tag.findAll(name='item')
+ for i in art:
+ titles.append(i.title.string)
+ for feed in feeds:
+ for article in feed.articles[:]:
+ article.title=titles[feed.articles.index(article)]
+ return feeds
+
+
def get_cover_url(self):
soup = self.index_to_soup('http://www.adventure-zone.info/fusion/news.php')
cover=soup.find(id='box_OstatninumerAZ')
@@ -22,17 +41,10 @@ class Adventure_zone(BasicNewsRecipe):
def skip_ad_pages(self, soup):
- skip_tag = soup.body.findAll(name='a')
- if skip_tag is not None:
- for r in skip_tag:
- if 'articles.php?' in r['href']:
- if r.strong is not None:
- word=r.strong.string
- if ('zapowied' or 'recenzj') in word:
- return self.index_to_soup('http://www.adventure-zone.info/fusion/print.php?type=A&item_id'+r['href'][r['href'].find('_id')+3:], raw=True)
- else:
- None
-
- def print_version(self, url):
- return url.replace('news.php?readmore', 'print.php?type=N&item_id')
-
+ skip_tag = soup.body.find(name='td', attrs={'class':'main-bg'})
+ skip_tag = skip_tag.findAll(name='a')
+ for r in skip_tag:
+ if r.strong:
+ word=r.strong.string
+ if word and (('zapowied' in word) or ('recenzj' in word) or ('solucj' in word)):
+ return self.index_to_soup('http://www.adventure-zone.info/fusion/print.php?type=A&item'+r['href'][r['href'].find('article_id')+7:], raw=True)
\ No newline at end of file
diff --git a/recipes/astro_news_pl.recipe b/recipes/astro_news_pl.recipe
index e5561fc98d..2808fed6e1 100644
--- a/recipes/astro_news_pl.recipe
+++ b/recipes/astro_news_pl.recipe
@@ -1,5 +1,4 @@
from calibre.web.feeds.news import BasicNewsRecipe
-
class AstroNEWS(BasicNewsRecipe):
title = u'AstroNEWS'
__author__ = 'fenuks'
@@ -8,11 +7,16 @@ class AstroNEWS(BasicNewsRecipe):
language = 'pl'
oldest_article = 8
max_articles_per_feed = 100
- auto_cleanup = True
+ #extra_css= 'table {text-align: left;}'
+ no_stylesheets=True
cover_url='http://news.astronet.pl/img/logo_news.jpg'
- # no_stylesheets= True
+ remove_tags=[dict(name='hr')]
feeds = [(u'Wiadomości', u'http://news.astronet.pl/rss.cgi')]
def print_version(self, url):
return url.replace('astronet.pl/', 'astronet.pl/print.cgi?')
+ def preprocess_html(self, soup):
+ for item in soup.findAll(align=True):
+ del item['align']
+ return soup
diff --git a/recipes/focus_pl.recipe b/recipes/focus_pl.recipe
index d63af135bc..7ff61a8a11 100644
--- a/recipes/focus_pl.recipe
+++ b/recipes/focus_pl.recipe
@@ -12,8 +12,9 @@ class Focus_pl(BasicNewsRecipe):
cover_url=''
remove_empty_feeds= True
no_stylesheets=True
- remove_tags_before=dict(name='div', attrs={'class':'h2 h2f'})
- remove_tags_after=dict(name='div', attrs={'class':'clear'})
+ #remove_tags_before=dict(name='div', attrs={'class':'h2 h2f'})
+ #remove_tags_after=dict(name='div', attrs={'class':'clear'})
+ keep_only_tags=[dict(name='div', attrs={'class':['h2 h2f', 'news-left', 'news-right']})]
feeds = [(u'Wszystkie kategorie', u'http://focus.pl.feedsportal.com/c/32992/f/532692/index.rss'),
(u'Nauka', u'http://focus.pl.feedsportal.com/c/32992/f/532693/index.rss'),
(u'Historia', u'http://focus.pl.feedsportal.com/c/32992/f/532694/index.rss'),
@@ -23,35 +24,33 @@ class Focus_pl(BasicNewsRecipe):
(u'Przyroda', u'http://focus.pl.feedsportal.com/c/32992/f/532698/index.rss'),
(u'Technologie', u'http://focus.pl.feedsportal.com/c/32992/f/532699/index.rss'),
(u'Warto wiedzieć', u'http://focus.pl.feedsportal.com/c/32992/f/532700/index.rss'),
-
-
-
-]
+ ]
def skip_ad_pages(self, soup):
- tag=soup.find(name='a')
- if tag:
- new_soup=self.index_to_soup(tag['href']+ 'do-druku/1/', raw=True)
- return new_soup
+ if 'Advertisement' in soup.title:
+ tag=soup.find(name='a')
+ if tag:
+ new_soup=self.index_to_soup(tag['href']+ 'do-druku/1/', raw=True)
+ return new_soup
def append_page(self, appendtag):
- tag=appendtag.find(name='div', attrs={'class':'arrows'})
- if tag:
- nexturl='http://www.focus.pl/'+tag.a['href']
- for rem in appendtag.findAll(name='div', attrs={'class':'klik-nav'}):
- rem.extract()
- while nexturl:
- soup2=self.index_to_soup(nexturl)
- nexturl=None
- pagetext=soup2.find(name='div', attrs={'class':'txt'})
- tag=pagetext.find(name='div', attrs={'class':'arrows'})
- for r in tag.findAll(name='a'):
- if u'Następne' in r.string:
- nexturl='http://www.focus.pl/'+r['href']
- for rem in pagetext.findAll(name='div', attrs={'class':'klik-nav'}):
- rem.extract()
- pos = len(appendtag.contents)
- appendtag.insert(pos, pagetext)
+ tag=appendtag.find(name='div', attrs={'class':'arrows'})
+ if tag:
+ nexturl='http://www.focus.pl/'+tag.a['href']
+ for rem in appendtag.findAll(name='div', attrs={'class':'klik-nav'}):
+ rem.extract()
+ while nexturl:
+ soup2=self.index_to_soup(nexturl)
+ nexturl=None
+ pagetext=soup2.find(name='div', attrs={'class':'txt'})
+ tag=pagetext.find(name='div', attrs={'class':'arrows'})
+ for r in tag.findAll(name='a'):
+ if u'Następne' in r.string:
+ nexturl='http://www.focus.pl/'+r['href']
+ for rem in pagetext.findAll(name='div', attrs={'class':'klik-nav'}):
+ rem.extract()
+ pos = len(appendtag.contents)
+ appendtag.insert(pos, pagetext)
def get_cover_url(self):
soup=self.index_to_soup('http://www.focus.pl/magazyn/')
diff --git a/recipes/naczytniki.recipe b/recipes/naczytniki.recipe
index 374c6dd0cb..e4769d58bc 100644
--- a/recipes/naczytniki.recipe
+++ b/recipes/naczytniki.recipe
@@ -7,6 +7,7 @@ class naczytniki(BasicNewsRecipe):
language = 'pl'
description ='everything about e-readers'
category='readers'
+ no_stylesheets=True
oldest_article = 7
max_articles_per_feed = 100
remove_tags_after= dict(name='div', attrs={'class':'sociable'})
diff --git a/recipes/nowa_fantastyka.recipe b/recipes/nowa_fantastyka.recipe
index d8015105f8..74534f3346 100644
--- a/recipes/nowa_fantastyka.recipe
+++ b/recipes/nowa_fantastyka.recipe
@@ -1,20 +1,21 @@
# -*- coding: utf-8 -*-
from calibre.web.feeds.news import BasicNewsRecipe
-
class Nowa_Fantastyka(BasicNewsRecipe):
title = u'Nowa Fantastyka'
oldest_article = 7
__author__ = 'fenuks'
language = 'pl'
+ encoding='latin2'
description ='site for fantasy readers'
category='fantasy'
max_articles_per_feed = 100
INDEX='http://www.fantastyka.pl/'
+ no_stylesheets=True
+ needs_subscription = 'optional'
remove_tags_before=dict(attrs={'class':'belka1-tlo-md'})
#remove_tags_after=dict(name='span', attrs={'class':'naglowek-oceny'})
remove_tags_after=dict(name='td', attrs={'class':'belka1-bot'})
- remove_tags=[dict(attrs={'class':'avatar2'})]
- feeds = []
+ remove_tags=[dict(attrs={'class':'avatar2'}), dict(name='span', attrs={'class':'alert-oceny'}), dict(name='img', attrs={'src':['obrazki/sledz1.png', 'obrazki/print.gif', 'obrazki/mlnf.gif']}), dict(name='b', text='Dodaj komentarz'),dict(name='a', attrs={'href':'http://www.fantastyka.pl/10,1727.html'})]
def find_articles(self, url):
articles = []
@@ -45,3 +46,13 @@ class Nowa_Fantastyka(BasicNewsRecipe):
cover=soup.find(name='img', attrs={'class':'okladka'})
self.cover_url=self.INDEX+ cover['src']
return getattr(self, 'cover_url', self.cover_url)
+
+ def get_browser(self):
+ br = BasicNewsRecipe.get_browser()
+ if self.username is not None and self.password is not None:
+ br.open('http://www.fantastyka.pl/')
+ br.select_form(nr=0)
+ br['login'] = self.username
+ br['pass'] = self.password
+ br.submit()
+ return br
diff --git a/recipes/spiders_web_pl.recipe b/recipes/spiders_web_pl.recipe
index d615f01aa9..4fe7d9c8d6 100644
--- a/recipes/spiders_web_pl.recipe
+++ b/recipes/spiders_web_pl.recipe
@@ -8,8 +8,8 @@ class SpidersWeb(BasicNewsRecipe):
cover_url = 'http://www.spidersweb.pl/wp-content/themes/spiderweb/img/Logo.jpg'
category = 'IT, WEB'
language = 'pl'
+ no_stylesheers=True
max_articles_per_feed = 100
- remove_tags_before=dict(name="h1", attrs={'class':'Title'})
- remove_tags_after=dict(name="div", attrs={'class':'Text'})
- remove_tags=[dict(name='div', attrs={'class':['Tags', 'CommentCount FloatL', 'Show FloatL']})]
+ keep_only_tags=[dict(id='Post')]
+ remove_tags=[dict(name='div', attrs={'class':['Comments', 'Shows', 'Post-Tags']})]
feeds = [(u'Wpisy', u'http://www.spidersweb.pl/feed')]