diff --git a/recipes/android_com_pl.recipe b/recipes/android_com_pl.recipe
index a44d5e560a..c7a4a97d3c 100644
--- a/recipes/android_com_pl.recipe
+++ b/recipes/android_com_pl.recipe
@@ -6,6 +6,7 @@ class Android_com_pl(BasicNewsRecipe):
description = 'Android.com.pl - biggest polish Android site'
category = 'Android, mobile'
language = 'pl'
+ use_embedded_content=True
cover_url =u'http://upload.wikimedia.org/wikipedia/commons/thumb/d/d7/Android_robot.svg/220px-Android_robot.svg.png'
oldest_article = 8
max_articles_per_feed = 100
diff --git a/recipes/cgm_pl.recipe b/recipes/cgm_pl.recipe
index 673a9f940b..4ab4402c3a 100644
--- a/recipes/cgm_pl.recipe
+++ b/recipes/cgm_pl.recipe
@@ -1,4 +1,5 @@
from calibre.web.feeds.news import BasicNewsRecipe
+from calibre.ebooks.BeautifulSoup import BeautifulSoup
class CGM(BasicNewsRecipe):
title = u'CGM'
@@ -17,9 +18,9 @@ class CGM(BasicNewsRecipe):
remove_tags_before=dict(id='mainContent')
remove_tags_after=dict(name='div', attrs={'class':'fbContainer'})
remove_tags=[dict(name='div', attrs={'class':'fbContainer'}),
- dict(name='p', attrs={'class':['tagCloud', 'galleryAuthor']}),
- dict(id=['movieShare', 'container'])]
- feeds = [(u'Informacje', u'http://www.cgm.pl/rss.xml'), (u'Polecamy', u'http://www.cgm.pl/rss,4,news.xml'),
+ dict(name='p', attrs={'class':['tagCloud', 'galleryAuthor']}),
+ dict(id=['movieShare', 'container'])]
+ feeds = [(u'Informacje', u'http://www.cgm.pl/rss.xml'), (u'Polecamy', u'http://www.cgm.pl/rss,4,news.xml'),
(u'Recenzje', u'http://www.cgm.pl/rss,1,news.xml')]
@@ -33,10 +34,12 @@ class CGM(BasicNewsRecipe):
img='http://www.cgm.pl'+img[img.find('url(')+4:img.find(')')]
gallery.contents[1].name='img'
gallery.contents[1]['src']=img
+ pos = len(gallery.contents)
+ gallery.insert(pos, BeautifulSoup('
'))
for item in soup.findAll(style=True):
del item['style']
ad=soup.findAll('a')
for r in ad:
- if 'www.hustla.pl' in r['href'] or 'www.ebilet.pl' in r['href']:
+ if 'www.hustla.pl' in r['href'] or 'www.ebilet.pl' in r['href']:
r.extract()
- return soup
\ No newline at end of file
+ return soup
diff --git a/recipes/elektroda_pl.recipe b/recipes/elektroda_pl.recipe
index c2123cb8cf..55858020ad 100644
--- a/recipes/elektroda_pl.recipe
+++ b/recipes/elektroda_pl.recipe
@@ -1,4 +1,5 @@
from calibre.web.feeds.news import BasicNewsRecipe
+from calibre.ebooks.BeautifulSoup import BeautifulSoup
class Elektroda(BasicNewsRecipe):
title = u'Elektroda'
@@ -13,3 +14,18 @@ class Elektroda(BasicNewsRecipe):
remove_tags_after=dict(name='td', attrs={'class':'spaceRow'})
remove_tags=[dict(name='a', attrs={'href':'#top'})]
feeds = [(u'Elektroda', u'http://www.elektroda.pl/rtvforum/rss.php')]
+
+
+ def preprocess_html(self, soup):
+ tag=soup.find('span', attrs={'class':'postbody'})
+ if tag:
+ pos = len(tag.contents)
+ tag.insert(pos, BeautifulSoup('
'))
+ return soup
+
+ def parse_feeds (self):
+ feeds = BasicNewsRecipe.parse_feeds(self)
+ for feed in feeds:
+ for article in feed.articles[:]:
+ article.title=article.title[article.title.find("::")+3:]
+ return feeds
diff --git a/recipes/film_web.recipe b/recipes/film_web.recipe
index 0671deec6c..877d4472bc 100644
--- a/recipes/film_web.recipe
+++ b/recipes/film_web.recipe
@@ -13,7 +13,7 @@ class Filmweb_pl(BasicNewsRecipe):
remove_empty_feeds=True
extra_css = '.hdrBig {font-size:22px;} ul {list-style-type:none; padding: 0; margin: 0;}'
remove_tags= [dict(name='div', attrs={'class':['recommendOthers']}), dict(name='ul', attrs={'class':'fontSizeSet'})]
- keep_only_tags= [dict(name='h1', attrs={'class':'hdrBig'}), dict(name='div', attrs={'class':['newsInfo', 'reviewContent fontSizeCont description']})]
+ keep_only_tags= [dict(name='h1', attrs={'class':['hdrBig', 'hdrEntity']}), dict(name='div', attrs={'class':['newsInfo', 'newsInfoSmall', 'reviewContent description']})]
feeds = [(u'Wszystkie newsy', u'http://www.filmweb.pl/feed/news/latest'),
(u'News / Filmy w produkcji', 'http://www.filmweb.pl/feed/news/category/filminproduction'),
(u'News / Festiwale, nagrody i przeglądy', u'http://www.filmweb.pl/feed/news/category/festival'),
diff --git a/recipes/gram_pl.recipe b/recipes/gram_pl.recipe
index c8655dc9cd..07927796c0 100644
--- a/recipes/gram_pl.recipe
+++ b/recipes/gram_pl.recipe
@@ -9,12 +9,12 @@ class Gram_pl(BasicNewsRecipe):
oldest_article = 8
max_articles_per_feed = 100
no_stylesheets= True
- extra_css = 'h2 {font-style: italic; font-size:20px;}'
+ extra_css = 'h2 {font-style: italic; font-size:20px;} .picbox div {float: left;}'
cover_url=u'http://www.gram.pl/www/01/img/grampl_zima.png'
remove_tags= [dict(name='p', attrs={'class':['extraText', 'must-log-in']}), dict(attrs={'class':['el', 'headline', 'post-info']}), dict(name='div', attrs={'class':['twojaOcena', 'comment-body', 'comment-author vcard', 'comment-meta commentmetadata', 'tw_button']}), dict(id=['igit_rpwt_css', 'comments', 'reply-title', 'igit_title'])]
keep_only_tags= [dict(name='div', attrs={'class':['main', 'arkh-postmetadataheader', 'arkh-postcontent', 'post', 'content', 'news_header', 'news_subheader', 'news_text']}), dict(attrs={'class':['contentheading', 'contentpaneopen']})]
- feeds = [(u'gram.pl - informacje', u'http://www.gram.pl/feed_news.asp'),
- (u'gram.pl - publikacje', u'http://www.gram.pl/feed_news.asp?type=articles')]
+ feeds = [(u'Informacje', u'http://www.gram.pl/feed_news.asp'),
+ (u'Publikacje', u'http://www.gram.pl/feed_news.asp?type=articles')]
def parse_feeds (self):
feeds = BasicNewsRecipe.parse_feeds(self)
@@ -23,3 +23,33 @@ class Gram_pl(BasicNewsRecipe):
if 'REKLAMA SKLEP' in article.title.upper() or u'ARTYKUŁ:' in article.title.upper():
feed.articles.remove(article)
return feeds
+
+ def append_page(self, soup, appendtag):
+ nexturl = appendtag.find('a', attrs={'class':'cpn'})
+ while nexturl:
+ soup2 = self.index_to_soup('http://www.gram.pl'+ nexturl['href'])
+ r=appendtag.find(id='pgbox')
+ if r:
+ r.extract()
+ pagetext = soup2.find(attrs={'class':'main'})
+ r=pagetext.find('h1')
+ if r:
+ r.extract()
+ r=pagetext.find('h2')
+ if r:
+ r.extract()
+ for r in pagetext.findAll('script'):
+ r.extract()
+ pos = len(appendtag.contents)
+ appendtag.insert(pos, pagetext)
+ nexturl = appendtag.find('a', attrs={'class':'cpn'})
+ r=appendtag.find(id='pgbox')
+ if r:
+ r.extract()
+
+ def preprocess_html(self, soup):
+ self.append_page(soup, soup.body)
+ tag=soup.findAll(name='div', attrs={'class':'picbox'})
+ for t in tag:
+ t['style']='float: left;'
+ return soup
\ No newline at end of file
diff --git a/recipes/naczytniki.recipe b/recipes/naczytniki.recipe
index 2ae6bc391e..3d1a8b6095 100644
--- a/recipes/naczytniki.recipe
+++ b/recipes/naczytniki.recipe
@@ -7,12 +7,12 @@ class naczytniki(BasicNewsRecipe):
cover_url = 'http://naczytniki.pl/wp-content/uploads/2010/08/logo_nc28.png'
language = 'pl'
description ='everything about e-readers'
- category='readers'
+ category='e-readers'
no_stylesheets=True
+ use_embedded_content=False
oldest_article = 7
max_articles_per_feed = 100
preprocess_regexps = [(re.compile(ur'
Zobacz także: