mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
some fixes and a new recipe
This commit is contained in:
parent
af8584474b
commit
084b8bd3dd
@ -1,4 +1,5 @@
|
|||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
import re
|
||||||
class BadaniaNet(BasicNewsRecipe):
|
class BadaniaNet(BasicNewsRecipe):
|
||||||
title = u'badania.net'
|
title = u'badania.net'
|
||||||
__author__ = 'fenuks'
|
__author__ = 'fenuks'
|
||||||
@ -6,9 +7,11 @@ class BadaniaNet(BasicNewsRecipe):
|
|||||||
category = 'science'
|
category = 'science'
|
||||||
language = 'pl'
|
language = 'pl'
|
||||||
cover_url = 'http://badania.net/wp-content/badanianet_green_transparent.png'
|
cover_url = 'http://badania.net/wp-content/badanianet_green_transparent.png'
|
||||||
|
extra_css = '.alignleft {float:left; margin-right:5px;} .alignright {float:right; margin-left:5px;}'
|
||||||
oldest_article = 7
|
oldest_article = 7
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
|
preprocess_regexps = [(re.compile(r"<h4>Tekst sponsoruje</h4>", re.IGNORECASE), lambda m: ''),]
|
||||||
remove_empty_feeds = True
|
remove_empty_feeds = True
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
remove_tags = [dict(attrs={'class':['omc-flex-category', 'omc-comment-count', 'omc-single-tags']})]
|
remove_tags = [dict(attrs={'class':['omc-flex-category', 'omc-comment-count', 'omc-single-tags']})]
|
||||||
|
@ -1,5 +1,6 @@
|
|||||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:fdm=marker:ai
|
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:fdm=marker:ai
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
from calibre.ebooks.BeautifulSoup import Comment
|
||||||
import re
|
import re
|
||||||
class FilmOrgPl(BasicNewsRecipe):
|
class FilmOrgPl(BasicNewsRecipe):
|
||||||
title = u'Film.org.pl'
|
title = u'Film.org.pl'
|
||||||
@ -7,14 +8,47 @@ class FilmOrgPl(BasicNewsRecipe):
|
|||||||
description = u"Recenzje, analizy, artykuły, rankingi - wszystko o filmie dla miłośników kina. Opisy efektów specjalnych, wersji reżyserskich, remake'ów, sequeli. No i forum filmowe. Jedne z największych w Polsce."
|
description = u"Recenzje, analizy, artykuły, rankingi - wszystko o filmie dla miłośników kina. Opisy efektów specjalnych, wersji reżyserskich, remake'ów, sequeli. No i forum filmowe. Jedne z największych w Polsce."
|
||||||
category = 'film'
|
category = 'film'
|
||||||
language = 'pl'
|
language = 'pl'
|
||||||
extra_css = '.alignright {float:right; margin-left:5px;} .alignleft {float:left; margin-right:5px;}'
|
extra_css = '.alignright {float:right; margin-left:5px;} .alignleft {float:left; margin-right:5px;} .recenzja-title {font-size: 150%; margin-top: 5px; margin-bottom: 5px;}'
|
||||||
cover_url = 'http://film.org.pl/wp-content/themes/KMF/images/logo_kmf10.png'
|
cover_url = 'http://film.org.pl/wp-content/themes/KMF/images/logo_kmf10.png'
|
||||||
ignore_duplicate_articles = {'title', 'url'}
|
ignore_duplicate_articles = {'title', 'url'}
|
||||||
oldest_article = 7
|
oldest_article = 7
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
|
remove_javascript = True
|
||||||
remove_empty_feeds = True
|
remove_empty_feeds = True
|
||||||
use_embedded_content = True
|
use_embedded_content = False
|
||||||
preprocess_regexps = [(re.compile(ur'<h3>Przeczytaj także:</h3>.*', re.IGNORECASE|re.DOTALL), lambda m: '</body>'), (re.compile(ur'<div>Artykuł</div>', re.IGNORECASE), lambda m: ''), (re.compile(ur'<div>Ludzie filmu</div>', re.IGNORECASE), lambda m: '')]
|
remove_attributes = ['style']
|
||||||
remove_tags = [dict(name='img', attrs={'alt':['Ludzie filmu', u'Artykuł']})]
|
preprocess_regexps = [(re.compile(ur'<h3>Przeczytaj także:</h3>.*', re.IGNORECASE|re.DOTALL), lambda m: '</body>'), (re.compile(ur'</?center>', re.IGNORECASE|re.DOTALL), lambda m: ''), (re.compile(ur'<div>Artykuł</div>', re.IGNORECASE), lambda m: ''), (re.compile(ur'<div>Ludzie filmu</div>', re.IGNORECASE), lambda m: ''), (re.compile(ur'(<br ?/?>\s*?){2,}', re.IGNORECASE|re.DOTALL), lambda m: '')]
|
||||||
|
keep_only_tags = [dict(name=['h11', 'h16', 'h17']), dict(attrs={'class':'editor'})]
|
||||||
|
remove_tags_after = dict(id='comments')
|
||||||
|
remove_tags = [dict(name=['link', 'meta', 'style']), dict(name='img', attrs={'alt':['Ludzie filmu', u'Artykuł']}), dict(id='comments'), dict(attrs={'style':'border: 0pt none ; margin: 0pt; padding: 0pt;'}), dict(name='p', attrs={'class':'rating'}), dict(attrs={'layout':'button_count'})]
|
||||||
feeds = [(u'Recenzje', u'http://film.org.pl/r/recenzje/feed/'), (u'Artyku\u0142', u'http://film.org.pl/a/artykul/feed/'), (u'Analiza', u'http://film.org.pl/a/analiza/feed/'), (u'Ranking', u'http://film.org.pl/a/ranking/feed/'), (u'Blog', u'http://film.org.pl/kmf/blog/feed/'), (u'Ludzie', u'http://film.org.pl/a/ludzie/feed/'), (u'Seriale', u'http://film.org.pl/a/seriale/feed/'), (u'Oceanarium', u'http://film.org.pl/a/ocenarium/feed/'), (u'VHS', u'http://film.org.pl/a/vhs-a/feed/')]
|
feeds = [(u'Recenzje', u'http://film.org.pl/r/recenzje/feed/'), (u'Artyku\u0142', u'http://film.org.pl/a/artykul/feed/'), (u'Analiza', u'http://film.org.pl/a/analiza/feed/'), (u'Ranking', u'http://film.org.pl/a/ranking/feed/'), (u'Blog', u'http://film.org.pl/kmf/blog/feed/'), (u'Ludzie', u'http://film.org.pl/a/ludzie/feed/'), (u'Seriale', u'http://film.org.pl/a/seriale/feed/'), (u'Oceanarium', u'http://film.org.pl/a/ocenarium/feed/'), (u'VHS', u'http://film.org.pl/a/vhs-a/feed/')]
|
||||||
|
|
||||||
|
def append_page(self, soup, appendtag):
|
||||||
|
tag = soup.find('div', attrs={'class': 'pagelink'})
|
||||||
|
if tag:
|
||||||
|
for nexturl in tag.findAll('a'):
|
||||||
|
url = nexturl['href']
|
||||||
|
soup2 = self.index_to_soup(url)
|
||||||
|
pagetext = soup2.find(attrs={'class': 'editor'})
|
||||||
|
comments = pagetext.findAll(text=lambda text:isinstance(text, Comment))
|
||||||
|
for comment in comments:
|
||||||
|
comment.extract()
|
||||||
|
pos = len(appendtag.contents)
|
||||||
|
appendtag.insert(pos, pagetext)
|
||||||
|
for r in appendtag.findAll(attrs={'class': 'pagelink'}):
|
||||||
|
r.extract()
|
||||||
|
for r in appendtag.findAll(attrs={'id': 'comments'}):
|
||||||
|
r.extract()
|
||||||
|
for r in appendtag.findAll(attrs={'style':'border: 0pt none ; margin: 0pt; padding: 0pt;'}):
|
||||||
|
r.extract()
|
||||||
|
for r in appendtag.findAll(attrs={'layout':'button_count'}):
|
||||||
|
r.extract()
|
||||||
|
|
||||||
|
def preprocess_html(self, soup):
|
||||||
|
for c in soup.findAll('h11'):
|
||||||
|
c.name = 'h1'
|
||||||
|
self.append_page(soup, soup.body)
|
||||||
|
for r in soup.findAll('br'):
|
||||||
|
r.extract()
|
||||||
|
return soup
|
@ -16,7 +16,7 @@ class Gram_pl(BasicNewsRecipe):
|
|||||||
#extra_css = 'h2 {font-style: italic; font-size:20px;} .picbox div {float: left;}'
|
#extra_css = 'h2 {font-style: italic; font-size:20px;} .picbox div {float: left;}'
|
||||||
cover_url=u'http://www.gram.pl/www/01/img/grampl_zima.png'
|
cover_url=u'http://www.gram.pl/www/01/img/grampl_zima.png'
|
||||||
keep_only_tags= [dict(id='articleModule')]
|
keep_only_tags= [dict(id='articleModule')]
|
||||||
remove_tags = [dict(attrs={'class':['breadCrump', 'dymek', 'articleFooter', 'twitter-share-button']})]
|
remove_tags = [dict(attrs={'class':['breadCrump', 'dymek', 'articleFooter', 'twitter-share-button']}), dict(name='aside')]
|
||||||
feeds = [(u'Informacje', u'http://www.gram.pl/feed_news.asp'),
|
feeds = [(u'Informacje', u'http://www.gram.pl/feed_news.asp'),
|
||||||
(u'Publikacje', u'http://www.gram.pl/feed_news.asp?type=articles')
|
(u'Publikacje', u'http://www.gram.pl/feed_news.asp?type=articles')
|
||||||
]
|
]
|
||||||
|
@ -8,20 +8,15 @@ class Historia_org_pl(BasicNewsRecipe):
|
|||||||
category = 'history'
|
category = 'history'
|
||||||
language = 'pl'
|
language = 'pl'
|
||||||
oldest_article = 8
|
oldest_article = 8
|
||||||
|
extra_css = 'img {float: left; margin-right: 10px;} .alignleft {float: left; margin-right: 10px;}'
|
||||||
remove_empty_feeds= True
|
remove_empty_feeds= True
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
use_embedded_content = True
|
use_embedded_content = True
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
ignore_duplicate_articles = {'title', 'url'}
|
ignore_duplicate_articles = {'title', 'url'}
|
||||||
|
|
||||||
|
|
||||||
feeds = [(u'Wszystkie', u'http://historia.org.pl/feed/'),
|
feeds = [(u'Wszystkie', u'http://historia.org.pl/feed/'),
|
||||||
(u'Wiadomości', u'http://historia.org.pl/Kategoria/wiadomosci/feed/'),
|
(u'Wiadomości', u'http://historia.org.pl/Kategoria/wiadomosci/feed/'),
|
||||||
(u'Publikacje', u'http://historia.org.pl/Kategoria/artykuly/feed/'),
|
(u'Publikacje', u'http://historia.org.pl/Kategoria/artykuly/feed/'),
|
||||||
(u'Publicystyka', u'http://historia.org.pl/Kategoria/publicystyka/feed/'),
|
(u'Publicystyka', u'http://historia.org.pl/Kategoria/publicystyka/feed/'),
|
||||||
(u'Recenzje', u'http://historia.org.pl/Kategoria/recenzje/feed/'),
|
(u'Recenzje', u'http://historia.org.pl/Kategoria/recenzje/feed/'),
|
||||||
(u'Projekty', u'http://historia.org.pl/Kategoria/projekty/feed/'),]
|
(u'Projekty', u'http://historia.org.pl/Kategoria/projekty/feed/'),]
|
||||||
|
|
||||||
|
|
||||||
def print_version(self, url):
|
|
||||||
return url + '?tmpl=component&print=1&layout=default&page='
|
|
BIN
recipes/icons/sport_pl.png
Normal file
BIN
recipes/icons/sport_pl.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 627 B |
@ -6,15 +6,14 @@ class INFRA(BasicNewsRecipe):
|
|||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
__author__ = 'fenuks'
|
__author__ = 'fenuks'
|
||||||
description = u'Serwis Informacyjny INFRA - UFO, Zjawiska Paranormalne, Duchy, Tajemnice świata.'
|
description = u'Serwis Informacyjny INFRA - UFO, Zjawiska Paranormalne, Duchy, Tajemnice świata.'
|
||||||
cover_url = 'http://npn.nazwa.pl/templates/ja_teline_ii/images/logo.jpg'
|
cover_url = 'http://i.imgur.com/j7hJT.jpg'
|
||||||
category = 'UFO'
|
category = 'UFO'
|
||||||
index='http://infra.org.pl'
|
index='http://infra.org.pl'
|
||||||
language = 'pl'
|
language = 'pl'
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
no_stylesheers=True
|
remove_attrs = ['style']
|
||||||
remove_tags_before=dict(name='h2', attrs={'class':'contentheading'})
|
no_stylesheets = True
|
||||||
remove_tags_after=dict(attrs={'class':'pagenav'})
|
keep_only_tags = [dict(id='ja-current-content')]
|
||||||
remove_tags=[dict(attrs={'class':'pagenav'})]
|
|
||||||
feeds = [(u'Najnowsze wiadomo\u015bci', u'http://www.infra.org.pl/rss')]
|
feeds = [(u'Najnowsze wiadomo\u015bci', u'http://www.infra.org.pl/rss')]
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
def preprocess_html(self, soup):
|
||||||
|
@ -1,3 +1,4 @@
|
|||||||
|
import re
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
class KDEFamilyPl(BasicNewsRecipe):
|
class KDEFamilyPl(BasicNewsRecipe):
|
||||||
@ -9,6 +10,7 @@ class KDEFamilyPl(BasicNewsRecipe):
|
|||||||
cover_url = 'http://www.mykde.home.pl/kdefamily/wp-content/uploads/2012/07/logotype-e1341585198616.jpg'
|
cover_url = 'http://www.mykde.home.pl/kdefamily/wp-content/uploads/2012/07/logotype-e1341585198616.jpg'
|
||||||
oldest_article = 7
|
oldest_article = 7
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
|
preprocess_regexps = [(re.compile(r"Podobne wpisy.*", re.IGNORECASE|re.DOTALL), lambda m: '')]
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
use_embedded_content = True
|
use_embedded_content = True
|
||||||
feeds = [(u'Wszystko', u'http://kdefamily.pl/feed/')]
|
feeds = [(u'Wszystko', u'http://kdefamily.pl/feed/')]
|
@ -25,6 +25,7 @@ class Konflikty(BasicNewsRecipe):
|
|||||||
for item in soup.findAll(style=True):
|
for item in soup.findAll(style=True):
|
||||||
del item['style']
|
del item['style']
|
||||||
for image in soup.findAll(name='a', attrs={'class':'image'}):
|
for image in soup.findAll(name='a', attrs={'class':'image'}):
|
||||||
|
image['style'] = 'width: 210px; float: left; margin-right:5px;'
|
||||||
if image.img and image.img.has_key('alt'):
|
if image.img and image.img.has_key('alt'):
|
||||||
image.name='div'
|
image.name='div'
|
||||||
pos = len(image.contents)
|
pos = len(image.contents)
|
||||||
|
@ -8,6 +8,7 @@ class Kosmonauta(BasicNewsRecipe):
|
|||||||
category = 'astronomy'
|
category = 'astronomy'
|
||||||
language = 'pl'
|
language = 'pl'
|
||||||
cover_url = 'http://bi.gazeta.pl/im/4/10393/z10393414X,Kosmonauta-net.jpg'
|
cover_url = 'http://bi.gazeta.pl/im/4/10393/z10393414X,Kosmonauta-net.jpg'
|
||||||
|
extra_css = '.thumbnail {float:left;margin-right:5px;}'
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
INDEX = 'http://www.kosmonauta.net'
|
INDEX = 'http://www.kosmonauta.net'
|
||||||
oldest_article = 7
|
oldest_article = 7
|
||||||
@ -16,10 +17,13 @@ class Kosmonauta(BasicNewsRecipe):
|
|||||||
remove_attributes = ['style']
|
remove_attributes = ['style']
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
keep_only_tags = [dict(name='div', attrs={'class':'item-page'})]
|
keep_only_tags = [dict(name='div', attrs={'class':'item-page'})]
|
||||||
remove_tags = [dict(attrs={'class':['article-tools clearfix', 'cedtag', 'nav clearfix', 'jwDisqusForm']})]
|
remove_tags = [dict(attrs={'class':['article-tools clearfix', 'cedtag', 'nav clearfix', 'jwDisqusForm']}), dict(attrs={'alt':['Poprzednia strona', 'Następna strona']})]
|
||||||
remove_tags_after = dict(name='div', attrs={'class':'cedtag'})
|
remove_tags_after = dict(name='div', attrs={'class':'cedtag'})
|
||||||
feeds = [(u'Kosmonauta.net', u'http://www.kosmonauta.net/?format=feed&type=atom')]
|
feeds = [(u'Kosmonauta.net', u'http://www.kosmonauta.net/?format=feed&type=atom')]
|
||||||
|
|
||||||
|
def print_version(self, url):
|
||||||
|
return url + '?tmpl=component&print=1&layout=default&page='
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
def preprocess_html(self, soup):
|
||||||
for a in soup.findAll(name='a'):
|
for a in soup.findAll(name='a'):
|
||||||
if a.has_key('href'):
|
if a.has_key('href'):
|
||||||
@ -27,4 +31,3 @@ class Kosmonauta(BasicNewsRecipe):
|
|||||||
if not href.startswith('http'):
|
if not href.startswith('http'):
|
||||||
a['href'] = self.INDEX + href
|
a['href'] = self.INDEX + href
|
||||||
return soup
|
return soup
|
||||||
|
|
||||||
|
@ -9,6 +9,7 @@ class Mlody_technik(BasicNewsRecipe):
|
|||||||
language = 'pl'
|
language = 'pl'
|
||||||
#cover_url = 'http://science-everywhere.pl/wp-content/uploads/2011/10/mt12.jpg'
|
#cover_url = 'http://science-everywhere.pl/wp-content/uploads/2011/10/mt12.jpg'
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
|
extra_css = 'img.alignleft {float: left; margin-right: 5px;}'
|
||||||
preprocess_regexps = [(re.compile(r"<h4>Podobne</h4>", re.IGNORECASE), lambda m: '')]
|
preprocess_regexps = [(re.compile(r"<h4>Podobne</h4>", re.IGNORECASE), lambda m: '')]
|
||||||
oldest_article = 7
|
oldest_article = 7
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
|
@ -11,6 +11,8 @@ class NaukawPolsce(BasicNewsRecipe):
|
|||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
remove_empty_feeds = True
|
remove_empty_feeds = True
|
||||||
|
extra_css = '.miniaturka {float: left; margin-right: 5px; max-width: 350px;} .miniaturka-dol-strony {display: inline-block; margin: 0 15px; width: 120px;}'
|
||||||
|
ignore_duplicate_articles = {'title', 'url'}
|
||||||
index = 'http://www.naukawpolsce.pl'
|
index = 'http://www.naukawpolsce.pl'
|
||||||
keep_only_tags = [dict(name='div', attrs={'class':'margines wiadomosc'})]
|
keep_only_tags = [dict(name='div', attrs={'class':'margines wiadomosc'})]
|
||||||
remove_tags = [dict(name='div', attrs={'class':'tagi'})]
|
remove_tags = [dict(name='div', attrs={'class':'tagi'})]
|
||||||
|
@ -7,6 +7,8 @@ class Niebezpiecznik_pl(BasicNewsRecipe):
|
|||||||
category = 'hacking, IT'
|
category = 'hacking, IT'
|
||||||
language = 'pl'
|
language = 'pl'
|
||||||
oldest_article = 8
|
oldest_article = 8
|
||||||
|
extra_css = '.entry {margin-top: 25px;}'
|
||||||
|
remove_attrs = ['style']
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
remove_empty_feeds = True
|
remove_empty_feeds = True
|
||||||
|
@ -6,6 +6,7 @@ class OSWorld(BasicNewsRecipe):
|
|||||||
category = 'OS, IT, open source, Linux'
|
category = 'OS, IT, open source, Linux'
|
||||||
language = 'pl'
|
language = 'pl'
|
||||||
cover_url = 'http://osworld.pl/wp-content/uploads/osworld-kwadrat-128x111.png'
|
cover_url = 'http://osworld.pl/wp-content/uploads/osworld-kwadrat-128x111.png'
|
||||||
|
extra_css = 'img.alignleft {float: left; margin-right: 5px;}'
|
||||||
oldest_article = 7
|
oldest_article = 7
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
|
@ -11,6 +11,7 @@ class PC_Centre(BasicNewsRecipe):
|
|||||||
cover_url= 'http://pccentre.pl/views/images/logo.gif'
|
cover_url= 'http://pccentre.pl/views/images/logo.gif'
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
remove_empty_feeds = True
|
remove_empty_feeds = True
|
||||||
|
ignore_duplicate_articles = {'title', 'url'}
|
||||||
#keep_only_tags= [dict(id='content')]
|
#keep_only_tags= [dict(id='content')]
|
||||||
#remove_tags=[dict(attrs={'class':['ikony r', 'list_of_content', 'dot accordion']}), dict(id='comments')]
|
#remove_tags=[dict(attrs={'class':['ikony r', 'list_of_content', 'dot accordion']}), dict(id='comments')]
|
||||||
remove_tags=[dict(attrs={'class':'logo_print'})]
|
remove_tags=[dict(attrs={'class':'logo_print'})]
|
||||||
|
72
recipes/sport_pl.recipe
Normal file
72
recipes/sport_pl.recipe
Normal file
@ -0,0 +1,72 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = 'teepel 2012'
|
||||||
|
|
||||||
|
'''
|
||||||
|
sport.pl
|
||||||
|
'''
|
||||||
|
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
import re
|
||||||
|
|
||||||
|
class sport_pl(BasicNewsRecipe):
|
||||||
|
title = 'Sport.pl'
|
||||||
|
__author__ = 'teepel <teepel44@gmail.com>'
|
||||||
|
language = 'pl'
|
||||||
|
description =u'Największy portal sportowy w Polsce. Wiadomości sportowe z najważniejszych wydarzeń, relacje i wyniki meczów na żywo.'
|
||||||
|
masthead_url='http://press.gazeta.pl/file/mediakit/154509/c8/sportpl.jpg'
|
||||||
|
oldest_article = 1
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
remove_javascript=True
|
||||||
|
no_stylesheets=True
|
||||||
|
remove_empty_feeds = True
|
||||||
|
|
||||||
|
keep_only_tags =[]
|
||||||
|
keep_only_tags.append(dict(name = 'div', attrs = {'id' : 'article'}))
|
||||||
|
|
||||||
|
remove_tags =[]
|
||||||
|
remove_tags.append(dict(name = 'a', attrs = {'href' : 'www.gazeta.pl'}))
|
||||||
|
|
||||||
|
feeds = [
|
||||||
|
(u'Wszystkie wiadomości', u'http://rss.gazeta.pl/pub/rss/sport.xml'),
|
||||||
|
(u'Piłka nożna', u'http://www.sport.pl/pub/rss/sport/pilka_nozna.htm'),
|
||||||
|
(u'F1', u'http://www.sport.pl/pub/rss/sportf1.htm'),
|
||||||
|
(u'Tenis', u'http://serwisy.gazeta.pl/pub/rss/tenis.htm'),
|
||||||
|
(u'Siatkówka', u'http://gazeta.pl.feedsportal.com/c/32739/f/611628/index.rss'),
|
||||||
|
(u'Koszykówka', u'http://gazeta.pl.feedsportal.com/c/32739/f/611647/index.rss'),
|
||||||
|
(u'Piłka ręczna', u'http://gazeta.pl.feedsportal.com/c/32739/f/611635/index.rss'),
|
||||||
|
(u'Inne sporty', u'http://gazeta.pl.feedsportal.com/c/32739/f/611649/index.rss'),
|
||||||
|
]
|
||||||
|
def parse_feeds(self):
|
||||||
|
feeds = BasicNewsRecipe.parse_feeds(self)
|
||||||
|
for feed in feeds:
|
||||||
|
for article in feed.articles[:]:
|
||||||
|
if '[ZDJĘCIA]' in article.title:
|
||||||
|
article.title = article.title.replace('[ZDJĘCIA]','')
|
||||||
|
elif '[WIDEO]' in article.title:
|
||||||
|
article.title = article.title.replace('[WIDEO]','')
|
||||||
|
return feeds
|
||||||
|
|
||||||
|
def print_version(self, url):
|
||||||
|
if 'feedsportal' in url:
|
||||||
|
segment = url.split('/')
|
||||||
|
urlPart = segment[-2]
|
||||||
|
urlPart = urlPart.replace('0L0Ssport0Bpl0C','')
|
||||||
|
urlPart = urlPart.replace('0C10H','/')
|
||||||
|
urlPart = urlPart.replace('0H',',')
|
||||||
|
urlPart = urlPart.replace('0I','_')
|
||||||
|
urlPart = urlPart.replace('A','')
|
||||||
|
segment1 = urlPart.split('/')
|
||||||
|
seg1 = segment1[0]
|
||||||
|
seg2 = segment1[1]
|
||||||
|
segment2 = seg2.split(',')
|
||||||
|
part = segment2[0] + ',' + segment2[1]
|
||||||
|
return 'http://www.sport.pl/' + seg1 + '/2029020,' + part + '.html'
|
||||||
|
else:
|
||||||
|
segment = url.split('/')
|
||||||
|
part2 = segment[-2]
|
||||||
|
part1 = segment[-1]
|
||||||
|
segment2 = part1.split(',')
|
||||||
|
part = segment2[1] + ',' + segment2[2]
|
||||||
|
return 'http://www.sport.pl/' + part2 + '/2029020,' + part + '.html'
|
File diff suppressed because one or more lines are too long
@ -8,11 +8,13 @@ class Tablety_pl(BasicNewsRecipe):
|
|||||||
cover_url = 'http://www.tablety.pl/wp-content/themes/kolektyw/img/logo.png'
|
cover_url = 'http://www.tablety.pl/wp-content/themes/kolektyw/img/logo.png'
|
||||||
category = 'IT'
|
category = 'IT'
|
||||||
language = 'pl'
|
language = 'pl'
|
||||||
use_embedded_content=True
|
use_embedded_content = False
|
||||||
|
no_stylesheets = True
|
||||||
oldest_article = 8
|
oldest_article = 8
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
preprocess_regexps = [(re.compile(ur'<p><strong>Przeczytaj także.*?</a></strong></p>', re.DOTALL), lambda match: ''), (re.compile(ur'<p><strong>Przeczytaj koniecznie.*?</a></strong></p>', re.DOTALL), lambda match: '')]
|
preprocess_regexps = [(re.compile(ur'<p><strong>Przeczytaj także.*?</a></strong></p>', re.DOTALL), lambda match: ''), (re.compile(ur'<p><strong>Przeczytaj koniecznie.*?</a></strong></p>', re.DOTALL), lambda match: '')]
|
||||||
|
keep_only_tags = [dict(id='news_block')]
|
||||||
#remove_tags_before=dict(name="h1", attrs={'class':'entry-title'})
|
#remove_tags_before=dict(name="h1", attrs={'class':'entry-title'})
|
||||||
#remove_tags_after=dict(name="footer", attrs={'class':'entry-footer clearfix'})
|
#remove_tags_after=dict(name="footer", attrs={'class':'entry-footer clearfix'})
|
||||||
#remove_tags=[dict(name='footer', attrs={'class':'entry-footer clearfix'}), dict(name='div', attrs={'class':'entry-comment-counter'})]
|
remove_tags=[dict(attrs={'class':['comments_icon', 'wp-polls', 'entry-comments']})]
|
||||||
feeds = [(u'Najnowsze posty', u'http://www.tablety.pl/feed/')]
|
feeds = [(u'Najnowsze posty', u'http://www.tablety.pl/feed/')]
|
@ -12,6 +12,7 @@ class WirtualneMedia(BasicNewsRecipe):
|
|||||||
description = u'Portal o mediach, reklamie, internecie, PR, telekomunikacji - nr 1 w Polsce - WirtualneMedia.pl - wiadomości z pierwszej ręki.'
|
description = u'Portal o mediach, reklamie, internecie, PR, telekomunikacji - nr 1 w Polsce - WirtualneMedia.pl - wiadomości z pierwszej ręki.'
|
||||||
category = 'internet'
|
category = 'internet'
|
||||||
language = 'pl'
|
language = 'pl'
|
||||||
|
ignore_duplicate_articles = {'title', 'url'}
|
||||||
masthead_url= 'http://i.wp.pl/a/f/jpeg/8654/wirtualnemedia.jpeg'
|
masthead_url= 'http://i.wp.pl/a/f/jpeg/8654/wirtualnemedia.jpeg'
|
||||||
cover_url= 'http://static.wirtualnemedia.pl/img/logo_wirtualnemedia_newsletter.gif'
|
cover_url= 'http://static.wirtualnemedia.pl/img/logo_wirtualnemedia_newsletter.gif'
|
||||||
remove_tags=[dict(id=['header', 'footer'])]
|
remove_tags=[dict(id=['header', 'footer'])]
|
||||||
@ -24,8 +25,6 @@ class WirtualneMedia(BasicNewsRecipe):
|
|||||||
(u'Reklama', u'http://www.wirtualnemedia.pl/rss/wm_reklama.xml'),
|
(u'Reklama', u'http://www.wirtualnemedia.pl/rss/wm_reklama.xml'),
|
||||||
(u'PR', u'http://www.wirtualnemedia.pl/rss/wm_relations.xml'),
|
(u'PR', u'http://www.wirtualnemedia.pl/rss/wm_relations.xml'),
|
||||||
(u'Technologie', u'http://www.wirtualnemedia.pl/rss/wm_telekomunikacja.xml'),
|
(u'Technologie', u'http://www.wirtualnemedia.pl/rss/wm_telekomunikacja.xml'),
|
||||||
(u'Telewizja', u'http://www.wirtualnemedia.pl/rss/wm_telewizja_rss.xml')
|
(u'Telewizja', u'http://www.wirtualnemedia.pl/rss/wm_telewizja_rss.xml')]
|
||||||
]
|
|
||||||
|
|
||||||
def print_version(self, url):
|
def print_version(self, url):
|
||||||
return url.replace('artykul', 'print')
|
return url.replace('artykul', 'print')
|
||||||
|
Loading…
x
Reference in New Issue
Block a user