mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Sync to trunk.
This commit is contained in:
commit
2d32342dd2
@ -6,6 +6,7 @@ class Android_com_pl(BasicNewsRecipe):
|
|||||||
description = 'Android.com.pl - biggest polish Android site'
|
description = 'Android.com.pl - biggest polish Android site'
|
||||||
category = 'Android, mobile'
|
category = 'Android, mobile'
|
||||||
language = 'pl'
|
language = 'pl'
|
||||||
|
use_embedded_content=True
|
||||||
cover_url =u'http://upload.wikimedia.org/wikipedia/commons/thumb/d/d7/Android_robot.svg/220px-Android_robot.svg.png'
|
cover_url =u'http://upload.wikimedia.org/wikipedia/commons/thumb/d/d7/Android_robot.svg/220px-Android_robot.svg.png'
|
||||||
oldest_article = 8
|
oldest_article = 8
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
|
@ -1,4 +1,5 @@
|
|||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
from calibre.ebooks.BeautifulSoup import BeautifulSoup
|
||||||
|
|
||||||
class CGM(BasicNewsRecipe):
|
class CGM(BasicNewsRecipe):
|
||||||
title = u'CGM'
|
title = u'CGM'
|
||||||
@ -17,8 +18,8 @@ class CGM(BasicNewsRecipe):
|
|||||||
remove_tags_before=dict(id='mainContent')
|
remove_tags_before=dict(id='mainContent')
|
||||||
remove_tags_after=dict(name='div', attrs={'class':'fbContainer'})
|
remove_tags_after=dict(name='div', attrs={'class':'fbContainer'})
|
||||||
remove_tags=[dict(name='div', attrs={'class':'fbContainer'}),
|
remove_tags=[dict(name='div', attrs={'class':'fbContainer'}),
|
||||||
dict(name='p', attrs={'class':['tagCloud', 'galleryAuthor']}),
|
dict(name='p', attrs={'class':['tagCloud', 'galleryAuthor']}),
|
||||||
dict(id=['movieShare', 'container'])]
|
dict(id=['movieShare', 'container'])]
|
||||||
feeds = [(u'Informacje', u'http://www.cgm.pl/rss.xml'), (u'Polecamy', u'http://www.cgm.pl/rss,4,news.xml'),
|
feeds = [(u'Informacje', u'http://www.cgm.pl/rss.xml'), (u'Polecamy', u'http://www.cgm.pl/rss,4,news.xml'),
|
||||||
(u'Recenzje', u'http://www.cgm.pl/rss,1,news.xml')]
|
(u'Recenzje', u'http://www.cgm.pl/rss,1,news.xml')]
|
||||||
|
|
||||||
@ -33,6 +34,8 @@ class CGM(BasicNewsRecipe):
|
|||||||
img='http://www.cgm.pl'+img[img.find('url(')+4:img.find(')')]
|
img='http://www.cgm.pl'+img[img.find('url(')+4:img.find(')')]
|
||||||
gallery.contents[1].name='img'
|
gallery.contents[1].name='img'
|
||||||
gallery.contents[1]['src']=img
|
gallery.contents[1]['src']=img
|
||||||
|
pos = len(gallery.contents)
|
||||||
|
gallery.insert(pos, BeautifulSoup('<br />'))
|
||||||
for item in soup.findAll(style=True):
|
for item in soup.findAll(style=True):
|
||||||
del item['style']
|
del item['style']
|
||||||
ad=soup.findAll('a')
|
ad=soup.findAll('a')
|
||||||
|
@ -1,4 +1,5 @@
|
|||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
from calibre.ebooks.BeautifulSoup import BeautifulSoup
|
||||||
|
|
||||||
class Elektroda(BasicNewsRecipe):
|
class Elektroda(BasicNewsRecipe):
|
||||||
title = u'Elektroda'
|
title = u'Elektroda'
|
||||||
@ -13,3 +14,18 @@ class Elektroda(BasicNewsRecipe):
|
|||||||
remove_tags_after=dict(name='td', attrs={'class':'spaceRow'})
|
remove_tags_after=dict(name='td', attrs={'class':'spaceRow'})
|
||||||
remove_tags=[dict(name='a', attrs={'href':'#top'})]
|
remove_tags=[dict(name='a', attrs={'href':'#top'})]
|
||||||
feeds = [(u'Elektroda', u'http://www.elektroda.pl/rtvforum/rss.php')]
|
feeds = [(u'Elektroda', u'http://www.elektroda.pl/rtvforum/rss.php')]
|
||||||
|
|
||||||
|
|
||||||
|
def preprocess_html(self, soup):
|
||||||
|
tag=soup.find('span', attrs={'class':'postbody'})
|
||||||
|
if tag:
|
||||||
|
pos = len(tag.contents)
|
||||||
|
tag.insert(pos, BeautifulSoup('<br />'))
|
||||||
|
return soup
|
||||||
|
|
||||||
|
def parse_feeds (self):
|
||||||
|
feeds = BasicNewsRecipe.parse_feeds(self)
|
||||||
|
for feed in feeds:
|
||||||
|
for article in feed.articles[:]:
|
||||||
|
article.title=article.title[article.title.find("::")+3:]
|
||||||
|
return feeds
|
||||||
|
@ -13,7 +13,7 @@ class Filmweb_pl(BasicNewsRecipe):
|
|||||||
remove_empty_feeds=True
|
remove_empty_feeds=True
|
||||||
extra_css = '.hdrBig {font-size:22px;} ul {list-style-type:none; padding: 0; margin: 0;}'
|
extra_css = '.hdrBig {font-size:22px;} ul {list-style-type:none; padding: 0; margin: 0;}'
|
||||||
remove_tags= [dict(name='div', attrs={'class':['recommendOthers']}), dict(name='ul', attrs={'class':'fontSizeSet'})]
|
remove_tags= [dict(name='div', attrs={'class':['recommendOthers']}), dict(name='ul', attrs={'class':'fontSizeSet'})]
|
||||||
keep_only_tags= [dict(name='h1', attrs={'class':'hdrBig'}), dict(name='div', attrs={'class':['newsInfo', 'reviewContent fontSizeCont description']})]
|
keep_only_tags= [dict(name='h1', attrs={'class':['hdrBig', 'hdrEntity']}), dict(name='div', attrs={'class':['newsInfo', 'newsInfoSmall', 'reviewContent description']})]
|
||||||
feeds = [(u'Wszystkie newsy', u'http://www.filmweb.pl/feed/news/latest'),
|
feeds = [(u'Wszystkie newsy', u'http://www.filmweb.pl/feed/news/latest'),
|
||||||
(u'News / Filmy w produkcji', 'http://www.filmweb.pl/feed/news/category/filminproduction'),
|
(u'News / Filmy w produkcji', 'http://www.filmweb.pl/feed/news/category/filminproduction'),
|
||||||
(u'News / Festiwale, nagrody i przeglądy', u'http://www.filmweb.pl/feed/news/category/festival'),
|
(u'News / Festiwale, nagrody i przeglądy', u'http://www.filmweb.pl/feed/news/category/festival'),
|
||||||
|
@ -9,12 +9,12 @@ class Gram_pl(BasicNewsRecipe):
|
|||||||
oldest_article = 8
|
oldest_article = 8
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
no_stylesheets= True
|
no_stylesheets= True
|
||||||
extra_css = 'h2 {font-style: italic; font-size:20px;}'
|
extra_css = 'h2 {font-style: italic; font-size:20px;} .picbox div {float: left;}'
|
||||||
cover_url=u'http://www.gram.pl/www/01/img/grampl_zima.png'
|
cover_url=u'http://www.gram.pl/www/01/img/grampl_zima.png'
|
||||||
remove_tags= [dict(name='p', attrs={'class':['extraText', 'must-log-in']}), dict(attrs={'class':['el', 'headline', 'post-info']}), dict(name='div', attrs={'class':['twojaOcena', 'comment-body', 'comment-author vcard', 'comment-meta commentmetadata', 'tw_button']}), dict(id=['igit_rpwt_css', 'comments', 'reply-title', 'igit_title'])]
|
remove_tags= [dict(name='p', attrs={'class':['extraText', 'must-log-in']}), dict(attrs={'class':['el', 'headline', 'post-info']}), dict(name='div', attrs={'class':['twojaOcena', 'comment-body', 'comment-author vcard', 'comment-meta commentmetadata', 'tw_button']}), dict(id=['igit_rpwt_css', 'comments', 'reply-title', 'igit_title'])]
|
||||||
keep_only_tags= [dict(name='div', attrs={'class':['main', 'arkh-postmetadataheader', 'arkh-postcontent', 'post', 'content', 'news_header', 'news_subheader', 'news_text']}), dict(attrs={'class':['contentheading', 'contentpaneopen']})]
|
keep_only_tags= [dict(name='div', attrs={'class':['main', 'arkh-postmetadataheader', 'arkh-postcontent', 'post', 'content', 'news_header', 'news_subheader', 'news_text']}), dict(attrs={'class':['contentheading', 'contentpaneopen']})]
|
||||||
feeds = [(u'gram.pl - informacje', u'http://www.gram.pl/feed_news.asp'),
|
feeds = [(u'Informacje', u'http://www.gram.pl/feed_news.asp'),
|
||||||
(u'gram.pl - publikacje', u'http://www.gram.pl/feed_news.asp?type=articles')]
|
(u'Publikacje', u'http://www.gram.pl/feed_news.asp?type=articles')]
|
||||||
|
|
||||||
def parse_feeds (self):
|
def parse_feeds (self):
|
||||||
feeds = BasicNewsRecipe.parse_feeds(self)
|
feeds = BasicNewsRecipe.parse_feeds(self)
|
||||||
@ -23,3 +23,33 @@ class Gram_pl(BasicNewsRecipe):
|
|||||||
if 'REKLAMA SKLEP' in article.title.upper() or u'ARTYKUŁ:' in article.title.upper():
|
if 'REKLAMA SKLEP' in article.title.upper() or u'ARTYKUŁ:' in article.title.upper():
|
||||||
feed.articles.remove(article)
|
feed.articles.remove(article)
|
||||||
return feeds
|
return feeds
|
||||||
|
|
||||||
|
def append_page(self, soup, appendtag):
|
||||||
|
nexturl = appendtag.find('a', attrs={'class':'cpn'})
|
||||||
|
while nexturl:
|
||||||
|
soup2 = self.index_to_soup('http://www.gram.pl'+ nexturl['href'])
|
||||||
|
r=appendtag.find(id='pgbox')
|
||||||
|
if r:
|
||||||
|
r.extract()
|
||||||
|
pagetext = soup2.find(attrs={'class':'main'})
|
||||||
|
r=pagetext.find('h1')
|
||||||
|
if r:
|
||||||
|
r.extract()
|
||||||
|
r=pagetext.find('h2')
|
||||||
|
if r:
|
||||||
|
r.extract()
|
||||||
|
for r in pagetext.findAll('script'):
|
||||||
|
r.extract()
|
||||||
|
pos = len(appendtag.contents)
|
||||||
|
appendtag.insert(pos, pagetext)
|
||||||
|
nexturl = appendtag.find('a', attrs={'class':'cpn'})
|
||||||
|
r=appendtag.find(id='pgbox')
|
||||||
|
if r:
|
||||||
|
r.extract()
|
||||||
|
|
||||||
|
def preprocess_html(self, soup):
|
||||||
|
self.append_page(soup, soup.body)
|
||||||
|
tag=soup.findAll(name='div', attrs={'class':'picbox'})
|
||||||
|
for t in tag:
|
||||||
|
t['style']='float: left;'
|
||||||
|
return soup
|
@ -7,12 +7,12 @@ class naczytniki(BasicNewsRecipe):
|
|||||||
cover_url = 'http://naczytniki.pl/wp-content/uploads/2010/08/logo_nc28.png'
|
cover_url = 'http://naczytniki.pl/wp-content/uploads/2010/08/logo_nc28.png'
|
||||||
language = 'pl'
|
language = 'pl'
|
||||||
description ='everything about e-readers'
|
description ='everything about e-readers'
|
||||||
category='readers'
|
category='e-readers'
|
||||||
no_stylesheets=True
|
no_stylesheets=True
|
||||||
|
use_embedded_content=False
|
||||||
oldest_article = 7
|
oldest_article = 7
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
preprocess_regexps = [(re.compile(ur'<p><br><b>Zobacz także:</b></p>.*?</body>', re.DOTALL), lambda match: '</body>') ]
|
preprocess_regexps = [(re.compile(ur'<p><br><b>Zobacz także:</b></p>.*?</body>', re.DOTALL), lambda match: '</body>') ]
|
||||||
remove_tags_after= dict(name='div', attrs={'class':'sociable'})
|
|
||||||
keep_only_tags=[dict(name='div', attrs={'class':'post'})]
|
keep_only_tags=[dict(name='div', attrs={'class':'post'})]
|
||||||
remove_tags=[dict(name='span', attrs={'class':'comments'}), dict(name='div', attrs={'class':'sociable'})]
|
remove_tags=[dict(name='span', attrs={'class':'comments'}), dict(name='div', attrs={'class':'sociable'})]
|
||||||
feeds = [(u'Wpisy', u'http://naczytniki.pl/?feed=rss2')]
|
feeds = [(u'Wpisy', u'http://naczytniki.pl/?feed=rss2')]
|
@ -17,21 +17,8 @@ class Overclock_pl(BasicNewsRecipe):
|
|||||||
remove_tags=[dict(name='span', attrs={'class':'info'}), dict(attrs={'class':'shareit'})]
|
remove_tags=[dict(name='span', attrs={'class':'info'}), dict(attrs={'class':'shareit'})]
|
||||||
feeds = [(u'Aktualno\u015bci', u'http://www.overclock.pl/rss.news.xml'), (u'Testy i recenzje', u'http://www.overclock.pl/rss.articles.xml')]
|
feeds = [(u'Aktualno\u015bci', u'http://www.overclock.pl/rss.news.xml'), (u'Testy i recenzje', u'http://www.overclock.pl/rss.articles.xml')]
|
||||||
|
|
||||||
|
def print_version(self, url):
|
||||||
def append_page(self, soup, appendtag):
|
if 'articles/show' in url:
|
||||||
tag=soup.find(id='navigation')
|
return url.replace('show', 'showall')
|
||||||
if tag:
|
else:
|
||||||
nexturl=tag.findAll('option')
|
return url
|
||||||
tag.extract()
|
|
||||||
for nextpage in nexturl[2:]:
|
|
||||||
soup2 = self.index_to_soup(nextpage['value'])
|
|
||||||
pagetext = soup2.find(id='content')
|
|
||||||
pos = len(appendtag.contents)
|
|
||||||
appendtag.insert(pos, pagetext)
|
|
||||||
rem=appendtag.find(attrs={'alt':'Pierwsza'})
|
|
||||||
if rem:
|
|
||||||
rem.parent.extract()
|
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
|
||||||
self.append_page(soup, soup.body)
|
|
||||||
return soup
|
|
@ -10,5 +10,7 @@ class palmtop_pl(BasicNewsRecipe):
|
|||||||
oldest_article = 7
|
oldest_article = 7
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
|
use_embedded_content=True
|
||||||
|
#remove_tags_before=dict(name='h2')
|
||||||
|
#remove_tags_after=dict(attrs={'class':'entry clearfix'})
|
||||||
feeds = [(u'Newsy', u'http://palmtop.pl/feed/atom/')]
|
feeds = [(u'Newsy', u'http://palmtop.pl/feed/atom/')]
|
||||||
|
@ -1,31 +1,25 @@
|
|||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
class PC_Arena(BasicNewsRecipe):
|
class PC_Arena(BasicNewsRecipe):
|
||||||
title = u'PCArena'
|
title = u'PCArena'
|
||||||
oldest_article = 18300
|
oldest_article = 7
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
__author__ = 'fenuks'
|
__author__ = 'fenuks'
|
||||||
description = u'Najnowsze informacje z branży IT - testy, recenzje, aktualności, rankingi, wywiady. Twoje źródło informacji o sprzęcie komputerowym.'
|
description = u'Najnowsze informacje z branży IT - testy, recenzje, aktualności, rankingi, wywiady. Twoje źródło informacji o sprzęcie komputerowym.'
|
||||||
category = 'IT'
|
category = 'IT'
|
||||||
language = 'pl'
|
language = 'pl'
|
||||||
masthead_url='http://pcarena.pl/public/design/frontend/images/logo.gif'
|
masthead_url='http://pcarena.pl/pcarena/img/logo.png'
|
||||||
cover_url= 'http://pcarena.pl/public/design/frontend/images/logo.gif'
|
cover_url= 'http://pcarena.pl/pcarena/img/logo.png'
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
keep_only_tags=[dict(attrs={'class':['artHeader', 'art']})]
|
remove_empty_feeds=True
|
||||||
remove_tags=[dict(attrs={'class':'pages'})]
|
#keep_only_tags=[dict(attrs={'class':['artHeader', 'art']})]
|
||||||
feeds = [(u'Newsy', u'http://pcarena.pl/misc/rss/news'), (u'Artyku\u0142y', u'http://pcarena.pl/misc/rss/articles')]
|
#remove_tags=[dict(attrs={'class':'pages'})]
|
||||||
|
feeds = [(u'Aktualności', u'http://pcarena.pl/aktualnosci/feeds.rss'), (u'Testy', u'http://pcarena.pl/testy/feeds.rss'), (u'Software', u'http://pcarena.pl/oprogramowanie/feeds.rss'), (u'Poradniki', u'http://pcarena.pl/poradniki/feeds.rss'), (u'Mobile', u'http://pcarena.pl/mobile/feeds.rss')]
|
||||||
|
|
||||||
def append_page(self, soup, appendtag):
|
def print_version(self, url):
|
||||||
tag=soup.find(name='div', attrs={'class':'pagNum'})
|
return url.replace('show', 'print')
|
||||||
if tag:
|
|
||||||
nexturl=tag.findAll('a')
|
|
||||||
tag.extract()
|
|
||||||
for nextpage in nexturl[1:]:
|
|
||||||
nextpage= 'http://pcarena.pl' + nextpage['href']
|
|
||||||
soup2 = self.index_to_soup(nextpage)
|
|
||||||
pagetext = soup2.find(attrs={'class':'artBody'})
|
|
||||||
pos = len(appendtag.contents)
|
|
||||||
appendtag.insert(pos, pagetext)
|
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
def image_url_processor(self, baseurl, url):
|
||||||
self.append_page(soup, soup.body)
|
if 'http' not in url:
|
||||||
return soup
|
return 'http://pcarena.pl' + url
|
||||||
|
else:
|
||||||
|
return url
|
@ -10,32 +10,11 @@ class PC_Centre(BasicNewsRecipe):
|
|||||||
masthead_url= 'http://pccentre.pl/views/images/logo.gif'
|
masthead_url= 'http://pccentre.pl/views/images/logo.gif'
|
||||||
cover_url= 'http://pccentre.pl/views/images/logo.gif'
|
cover_url= 'http://pccentre.pl/views/images/logo.gif'
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
keep_only_tags= [dict(id='content')]
|
remove_empty_feeds = True
|
||||||
remove_tags=[dict(attrs={'class':['ikony r', 'list_of_content', 'dot accordion']}), dict(id='comments')]
|
#keep_only_tags= [dict(id='content')]
|
||||||
feeds = [(u'Publikacje', u'http://pccentre.pl/backend.php?mode=a'), (u'Aktualno\u015bci', u'http://pccentre.pl/backend.php'), (u'Sprz\u0119t komputerowy', u'http://pccentre.pl/backend.php?mode=n§ion=2'), (u'Oprogramowanie', u'http://pccentre.pl/backend.php?mode=n§ion=3'), (u'Gry komputerowe i konsole', u'http://pccentre.pl/backend.php?mode=n§ion=4'), (u'Internet', u'http://pccentre.pl/backend.php?mode=n§ion=7'), (u'Bezpiecze\u0144stwo', u'http://pccentre.pl/backend.php?mode=n§ion=5'), (u'Multimedia', u'http://pccentre.pl/backend.php?mode=n§ion=6'), (u'Biznes', u'http://pccentre.pl/backend.php?mode=n§ion=9')]
|
#remove_tags=[dict(attrs={'class':['ikony r', 'list_of_content', 'dot accordion']}), dict(id='comments')]
|
||||||
|
remove_tags=[dict(attrs={'class':'logo_print'})]
|
||||||
|
feeds = [(u'Aktualno\u015bci', u'http://pccentre.pl/backend.php'), (u'Publikacje', u'http://pccentre.pl/backend.php?mode=a'), (u'Sprz\u0119t komputerowy', u'http://pccentre.pl/backend.php?mode=n§ion=2'), (u'Oprogramowanie', u'http://pccentre.pl/backend.php?mode=n§ion=3'), (u'Gry komputerowe i konsole', u'http://pccentre.pl/backend.php?mode=n§ion=4'), (u'Internet', u'http://pccentre.pl/backend.php?mode=n§ion=7'), (u'Bezpiecze\u0144stwo', u'http://pccentre.pl/backend.php?mode=n§ion=5'), (u'Multimedia', u'http://pccentre.pl/backend.php?mode=n§ion=6'), (u'Biznes', u'http://pccentre.pl/backend.php?mode=n§ion=9')]
|
||||||
|
|
||||||
|
def print_version(self, url):
|
||||||
def append_page(self, soup, appendtag):
|
return url.replace('show', 'print')
|
||||||
tag=soup.find(name='div', attrs={'class':'pages'})
|
|
||||||
if tag:
|
|
||||||
nexturl=tag.findAll('a')
|
|
||||||
tag.extract()
|
|
||||||
for nextpage in nexturl[:-1]:
|
|
||||||
nextpage= 'http://pccentre.pl' + nextpage['href']
|
|
||||||
soup2 = self.index_to_soup(nextpage)
|
|
||||||
pagetext = soup2.find(id='content')
|
|
||||||
rem=pagetext.findAll(attrs={'class':['subtitle', 'content_info', 'list_of_content', 'pages', 'social2', 'pcc_acc', 'pcc_acc_na']})
|
|
||||||
for r in rem:
|
|
||||||
r.extract()
|
|
||||||
rem=pagetext.findAll(id='comments')
|
|
||||||
for r in rem:
|
|
||||||
r.extract()
|
|
||||||
rem=pagetext.findAll('h1')
|
|
||||||
for r in rem:
|
|
||||||
r.extract()
|
|
||||||
pos = len(appendtag.contents)
|
|
||||||
appendtag.insert(pos, pagetext)
|
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
|
||||||
self.append_page(soup, soup.body)
|
|
||||||
return soup
|
|
@ -8,10 +8,11 @@ class Tablety_pl(BasicNewsRecipe):
|
|||||||
cover_url = 'http://www.tablety.pl/wp-content/themes/kolektyw/img/logo.png'
|
cover_url = 'http://www.tablety.pl/wp-content/themes/kolektyw/img/logo.png'
|
||||||
category = 'IT'
|
category = 'IT'
|
||||||
language = 'pl'
|
language = 'pl'
|
||||||
|
use_embedded_content=True
|
||||||
oldest_article = 8
|
oldest_article = 8
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
preprocess_regexps = [(re.compile(ur'<p><strong>Przeczytaj także.*?</a></strong></p>', re.DOTALL), lambda match: ''), (re.compile(ur'<p><strong>Przeczytaj koniecznie.*?</a></strong></p>', re.DOTALL), lambda match: '')]
|
preprocess_regexps = [(re.compile(ur'<p><strong>Przeczytaj także.*?</a></strong></p>', re.DOTALL), lambda match: ''), (re.compile(ur'<p><strong>Przeczytaj koniecznie.*?</a></strong></p>', re.DOTALL), lambda match: '')]
|
||||||
remove_tags_before=dict(name="h1", attrs={'class':'entry-title'})
|
#remove_tags_before=dict(name="h1", attrs={'class':'entry-title'})
|
||||||
remove_tags_after=dict(name="div", attrs={'class':'snap_nopreview sharing robots-nocontent'})
|
#remove_tags_after=dict(name="footer", attrs={'class':'entry-footer clearfix'})
|
||||||
remove_tags=[dict(name='div', attrs={'class':'snap_nopreview sharing robots-nocontent'})]
|
#remove_tags=[dict(name='footer', attrs={'class':'entry-footer clearfix'}), dict(name='div', attrs={'class':'entry-comment-counter'})]
|
||||||
feeds = [(u'Najnowsze posty', u'http://www.tablety.pl/feed/')]
|
feeds = [(u'Najnowsze posty', u'http://www.tablety.pl/feed/')]
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
import re
|
||||||
|
|
||||||
class AdvancedUserRecipe1312886443(BasicNewsRecipe):
|
class AdvancedUserRecipe1312886443(BasicNewsRecipe):
|
||||||
title = u'WNP'
|
title = u'WNP'
|
||||||
@ -8,10 +8,11 @@ class AdvancedUserRecipe1312886443(BasicNewsRecipe):
|
|||||||
description = u'Wirtualny Nowy Przemysł'
|
description = u'Wirtualny Nowy Przemysł'
|
||||||
category = 'economy'
|
category = 'economy'
|
||||||
language = 'pl'
|
language = 'pl'
|
||||||
|
preprocess_regexps = [(re.compile(ur'Czytaj też:.*?</a>', re.DOTALL), lambda match: ''), (re.compile(ur'Czytaj więcej:.*?</a>', re.DOTALL), lambda match: '')]
|
||||||
oldest_article = 8
|
oldest_article = 8
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
no_stylesheets= True
|
no_stylesheets= True
|
||||||
keep_only_tags = dict(name='div', attrs={'id':'contentText'})
|
remove_tags=[dict(attrs={'class':'printF'})]
|
||||||
feeds = [(u'Wiadomości gospodarcze', u'http://www.wnp.pl/rss/serwis_rss.xml'),
|
feeds = [(u'Wiadomości gospodarcze', u'http://www.wnp.pl/rss/serwis_rss.xml'),
|
||||||
(u'Serwis Energetyka - Gaz', u'http://www.wnp.pl/rss/serwis_rss_1.xml'),
|
(u'Serwis Energetyka - Gaz', u'http://www.wnp.pl/rss/serwis_rss_1.xml'),
|
||||||
(u'Serwis Nafta - Chemia', u'http://www.wnp.pl/rss/serwis_rss_2.xml'),
|
(u'Serwis Nafta - Chemia', u'http://www.wnp.pl/rss/serwis_rss_2.xml'),
|
||||||
@ -19,3 +20,7 @@ class AdvancedUserRecipe1312886443(BasicNewsRecipe):
|
|||||||
(u'Serwis Górnictwo', u'http://www.wnp.pl/rss/serwis_rss_4.xml'),
|
(u'Serwis Górnictwo', u'http://www.wnp.pl/rss/serwis_rss_4.xml'),
|
||||||
(u'Serwis Logistyka', u'http://www.wnp.pl/rss/serwis_rss_5.xml'),
|
(u'Serwis Logistyka', u'http://www.wnp.pl/rss/serwis_rss_5.xml'),
|
||||||
(u'Serwis IT', u'http://www.wnp.pl/rss/serwis_rss_6.xml')]
|
(u'Serwis IT', u'http://www.wnp.pl/rss/serwis_rss_6.xml')]
|
||||||
|
|
||||||
|
|
||||||
|
def print_version(self, url):
|
||||||
|
return 'http://wnp.pl/drukuj/' +url[url.find(',')+1:]
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@ -7,7 +7,6 @@ __copyright__ = '2011, John Schember <john@nachtimwald.com>'
|
|||||||
__docformat__ = 'restructuredtext en'
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
import random
|
import random
|
||||||
import re
|
|
||||||
import urllib
|
import urllib
|
||||||
from contextlib import closing
|
from contextlib import closing
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user