mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Update various Polish news sources
This commit is contained in:
commit
6ce4e61d4f
@ -10,15 +10,15 @@ class Adventure_zone(BasicNewsRecipe):
|
||||
oldest_article = 20
|
||||
max_articles_per_feed = 100
|
||||
cover_url = 'http://www.adventure-zone.info/inne/logoaz_2012.png'
|
||||
index='http://www.adventure-zone.info/fusion/'
|
||||
index = 'http://www.adventure-zone.info/fusion/'
|
||||
use_embedded_content = False
|
||||
preprocess_regexps = [(re.compile(r"<td class='capmain'>Komentarze</td>", re.IGNORECASE), lambda m: ''),
|
||||
(re.compile(r'</?table.*?>'), lambda match: ''),
|
||||
(re.compile(r'</?tbody.*?>'), lambda match: '')]
|
||||
remove_tags_before= dict(name='td', attrs={'class':'main-bg'})
|
||||
remove_tags= [dict(name='img', attrs={'alt':'Drukuj'})]
|
||||
remove_tags_after= dict(id='comments')
|
||||
extra_css = '.main-bg{text-align: left;} td.capmain{ font-size: 22px; }'
|
||||
remove_tags_before = dict(name='td', attrs={'class':'main-bg'})
|
||||
remove_tags = [dict(name='img', attrs={'alt':'Drukuj'})]
|
||||
remove_tags_after = dict(id='comments')
|
||||
extra_css = '.main-bg{text-align: left;} td.capmain{ font-size: 22px; } img.news-category {float: left; margin-right: 5px;}'
|
||||
feeds = [(u'Nowinki', u'http://www.adventure-zone.info/fusion/feeds/news.php')]
|
||||
|
||||
'''def get_cover_url(self):
|
||||
@ -26,7 +26,7 @@ class Adventure_zone(BasicNewsRecipe):
|
||||
cover=soup.find(id='box_OstatninumerAZ')
|
||||
self.cover_url='http://www.adventure-zone.info/fusion/'+ cover.center.a.img['src']
|
||||
return getattr(self, 'cover_url', self.cover_url)'''
|
||||
|
||||
|
||||
def populate_article_metadata(self, article, soup, first):
|
||||
result = re.search('(.+) - Adventure Zone', soup.title.string)
|
||||
if result:
|
||||
@ -66,5 +66,4 @@ class Adventure_zone(BasicNewsRecipe):
|
||||
if a.has_key('href') and 'http://' not in a['href'] and 'https://' not in a['href']:
|
||||
a['href']=self.index + a['href']
|
||||
return soup
|
||||
|
||||
|
||||
|
||||
|
@ -18,3 +18,10 @@ class Astroflesz(BasicNewsRecipe):
|
||||
remove_tags_after = dict(name='div', attrs={'class':'itemLinks'})
|
||||
remove_tags = [dict(name='div', attrs={'class':['itemLinks', 'itemToolbar', 'itemRatingBlock']})]
|
||||
feeds = [(u'Wszystkie', u'http://astroflesz.pl/?format=feed')]
|
||||
|
||||
def postprocess_html(self, soup, first_fetch):
|
||||
t = soup.find(attrs={'class':'itemIntroText'})
|
||||
if t:
|
||||
for i in t.findAll('img'):
|
||||
i['style'] = 'float: left; margin-right: 5px;'
|
||||
return soup
|
||||
|
@ -11,7 +11,8 @@ class Ciekawostki_Historyczne(BasicNewsRecipe):
|
||||
masthead_url = 'http://ciekawostkihistoryczne.pl/wp-content/themes/Wordpress_Magazine/images/logo-ciekawostki-historyczne-male.jpg'
|
||||
cover_url = 'http://ciekawostkihistoryczne.pl/wp-content/themes/Wordpress_Magazine/images/logo-ciekawostki-historyczne-male.jpg'
|
||||
max_articles_per_feed = 100
|
||||
oldest_article = 140000
|
||||
extra_css = 'img.alignleft {float:left; margin-right:5px;} .alignright {float:right; margin-left:5px;}'
|
||||
oldest_article = 12
|
||||
preprocess_regexps = [(re.compile(ur'Ten artykuł ma kilka stron.*?</fb:like>', re.DOTALL), lambda match: ''), (re.compile(ur'<h2>Zobacz też:</h2>.*?</ol>', re.DOTALL), lambda match: '')]
|
||||
no_stylesheets = True
|
||||
remove_empty_feeds = True
|
||||
|
@ -11,6 +11,7 @@ class CoNowegoPl(BasicNewsRecipe):
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
INDEX = 'http://www.conowego.pl/'
|
||||
extra_css = '.news-single-img {float:left; margin-right:5px;}'
|
||||
no_stylesheets = True
|
||||
remove_empty_feeds = True
|
||||
use_embedded_content = False
|
||||
@ -35,7 +36,7 @@ class CoNowegoPl(BasicNewsRecipe):
|
||||
pagetext = soup2.find(attrs={'class':'ni_content'})
|
||||
pos = len(appendtag.contents)
|
||||
appendtag.insert(pos, pagetext)
|
||||
|
||||
|
||||
comments = appendtag.findAll(text=lambda text:isinstance(text, Comment))
|
||||
for comment in comments:
|
||||
comment.extract()
|
||||
|
@ -12,11 +12,13 @@ class CzasGentlemanow(BasicNewsRecipe):
|
||||
ignore_duplicate_articles = {'title', 'url'}
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
extra_css = '.gallery-item {float:left; margin-right: 10px; max-width: 20%;} .alignright {text-align: right; float:right; margin-left:5px;}\
|
||||
.wp-caption-text {text-align: left;} img.aligncenter {display: block; margin-left: auto; margin-right: auto;} .alignleft {float: left; margin-right:5px;}'
|
||||
no_stylesheets = True
|
||||
remove_empty_feeds = True
|
||||
preprocess_regexps = [(re.compile(u'<h3>Może Cię też zainteresować:</h3>'), lambda m: '')]
|
||||
use_embedded_content = False
|
||||
keep_only_tags = [dict(name='div', attrs={'class':'content'})]
|
||||
remove_tags = [dict(attrs={'class':'meta_comments'}), dict(id=['comments', 'related_posts_thumbnails'])]
|
||||
remove_tags = [dict(attrs={'class':'meta_comments'}), dict(id=['comments', 'related_posts_thumbnails', 'respond'])]
|
||||
remove_tags_after = dict(id='comments')
|
||||
feeds = [(u'M\u0119ski \u015awiat', u'http://czasgentlemanow.pl/category/meski-swiat/feed/'), (u'Styl', u'http://czasgentlemanow.pl/category/styl/feed/'), (u'Vademecum Gentlemana', u'http://czasgentlemanow.pl/category/vademecum/feed/'), (u'Dom i rodzina', u'http://czasgentlemanow.pl/category/dom-i-rodzina/feed/'), (u'Honor', u'http://czasgentlemanow.pl/category/honor/feed/'), (u'Gad\u017cety Gentlemana', u'http://czasgentlemanow.pl/category/gadzety-gentlemana/feed/')]
|
||||
|
@ -16,6 +16,7 @@ class Dobreprogramy_pl(BasicNewsRecipe):
|
||||
extra_css = '.title {font-size:22px;}'
|
||||
oldest_article = 8
|
||||
max_articles_per_feed = 100
|
||||
remove_attrs = ['style', 'width', 'height']
|
||||
preprocess_regexps = [(re.compile(ur'<div id="\S+360pmp4">Twoja przeglądarka nie obsługuje Flasha i HTML5 lub wyłączono obsługę JavaScript...</div>'), lambda match: '') ]
|
||||
keep_only_tags=[dict(attrs={'class':['news', 'entry single']})]
|
||||
remove_tags = [dict(attrs={'class':['newsOptions', 'noPrint', 'komentarze', 'tags font-heading-master']}), dict(id='komentarze'), dict(name='iframe')]
|
||||
@ -28,4 +29,11 @@ class Dobreprogramy_pl(BasicNewsRecipe):
|
||||
for a in soup('a'):
|
||||
if a.has_key('href') and 'http://' not in a['href'] and 'https://' not in a['href']:
|
||||
a['href']=self.index + a['href']
|
||||
for r in soup.findAll('iframe'):
|
||||
r.parent.extract()
|
||||
return soup
|
||||
def postprocess_html(self, soup, first_fetch):
|
||||
for r in soup.findAll('span', text=''):
|
||||
if not r.string:
|
||||
r.extract()
|
||||
return soup
|
@ -9,6 +9,7 @@ class Dzieje(BasicNewsRecipe):
|
||||
category = 'history'
|
||||
language = 'pl'
|
||||
ignore_duplicate_articles = {'title', 'url'}
|
||||
extra_css = '.imagecache-default {float:left; margin-right:20px;}'
|
||||
index = 'http://dzieje.pl'
|
||||
oldest_article = 8
|
||||
max_articles_per_feed = 100
|
||||
|
@ -9,7 +9,7 @@ class EkologiaPl(BasicNewsRecipe):
|
||||
language = 'pl'
|
||||
cover_url = 'http://www.ekologia.pl/assets/images/logo/ekologia_pl_223x69.png'
|
||||
ignore_duplicate_articles = {'title', 'url'}
|
||||
extra_css = '.title {font-size: 200%;}'
|
||||
extra_css = '.title {font-size: 200%;} .imagePowiazane, .imgCon {float:left; margin-right:5px;}'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
|
@ -7,6 +7,7 @@ class FilmOrgPl(BasicNewsRecipe):
|
||||
description = u"Recenzje, analizy, artykuły, rankingi - wszystko o filmie dla miłośników kina. Opisy efektów specjalnych, wersji reżyserskich, remake'ów, sequeli. No i forum filmowe. Jedne z największych w Polsce."
|
||||
category = 'film'
|
||||
language = 'pl'
|
||||
extra_css = '.alignright {float:right; margin-left:5px;} .alignleft {float:left; margin-right:5px;}'
|
||||
cover_url = 'http://film.org.pl/wp-content/themes/KMF/images/logo_kmf10.png'
|
||||
ignore_duplicate_articles = {'title', 'url'}
|
||||
oldest_article = 7
|
||||
|
@ -10,7 +10,6 @@ class FilmWebPl(BasicNewsRecipe):
|
||||
category = 'movies'
|
||||
language = 'pl'
|
||||
index = 'http://www.filmweb.pl'
|
||||
#extra_css = '.MarkupPhotoHTML-7 {float:left; margin-right: 10px;}'
|
||||
oldest_article = 8
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
@ -19,9 +18,9 @@ class FilmWebPl(BasicNewsRecipe):
|
||||
remove_javascript = True
|
||||
preprocess_regexps = [(re.compile(u'\(kliknij\,\ aby powiększyć\)', re.IGNORECASE), lambda m: ''), (re.compile(ur'(<br ?/?>\s*?<br ?/?>\s*?)+', re.IGNORECASE), lambda m: '<br />')]#(re.compile(ur' | ', re.IGNORECASE), lambda m: '')]
|
||||
extra_css = '.hdrBig {font-size:22px;} ul {list-style-type:none; padding: 0; margin: 0;}'
|
||||
remove_tags = [dict(name='div', attrs={'class':['recommendOthers']}), dict(name='ul', attrs={'class':'fontSizeSet'}), dict(attrs={'class':'userSurname anno'})]
|
||||
#remove_tags = [dict()]
|
||||
remove_attributes = ['style',]
|
||||
keep_only_tags = [dict(name='h1', attrs={'class':['hdrBig', 'hdrEntity']}), dict(name='div', attrs={'class':['newsInfo', 'newsInfoSmall', 'reviewContent description']})]
|
||||
keep_only_tags = [dict(attrs={'class':['hdr hdr-super', 'newsContent']})]
|
||||
feeds = [(u'News / Filmy w produkcji', 'http://www.filmweb.pl/feed/news/category/filminproduction'),
|
||||
(u'News / Festiwale, nagrody i przeglądy', u'http://www.filmweb.pl/feed/news/category/festival'),
|
||||
(u'News / Seriale', u'http://www.filmweb.pl/feed/news/category/serials'),
|
||||
@ -44,12 +43,12 @@ class FilmWebPl(BasicNewsRecipe):
|
||||
skip_tag = soup.find('a', attrs={'class':'welcomeScreenButton'})
|
||||
if skip_tag is not None:
|
||||
return self.index_to_soup(skip_tag['href'], raw=True)
|
||||
|
||||
|
||||
def postprocess_html(self, soup, first_fetch):
|
||||
for r in soup.findAll(attrs={'class':'singlephoto'}):
|
||||
r['style'] = 'float:left; margin-right: 10px;'
|
||||
return soup
|
||||
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
for a in soup('a'):
|
||||
if a.has_key('href') and 'http://' not in a['href'] and 'https://' not in a['href']:
|
||||
@ -59,11 +58,6 @@ class FilmWebPl(BasicNewsRecipe):
|
||||
for i in soup.findAll('sup'):
|
||||
if not i.string or i.string.startswith('(kliknij'):
|
||||
i.extract()
|
||||
tag = soup.find(name='ul', attrs={'class':'inline sep-line'})
|
||||
if tag:
|
||||
tag.name = 'div'
|
||||
for t in tag.findAll('li'):
|
||||
t.name = 'div'
|
||||
for r in soup.findAll(id=re.compile('photo-\d+')):
|
||||
r.extract()
|
||||
for r in soup.findAll(style=re.compile('float: ?left')):
|
||||
|
@ -9,8 +9,9 @@ class Niebezpiecznik_pl(BasicNewsRecipe):
|
||||
oldest_article = 8
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
remove_empty_feeds = True
|
||||
cover_url = u'http://userlogos.org/files/logos/Karmody/niebezpiecznik_01.png'
|
||||
remove_tags = [dict(name='div', attrs={'class':['sociable']}), dict(name='h4'), dict(attrs={'class':'similar-posts'})]
|
||||
keep_only_tags = [dict(name='div', attrs={'class':['title', 'entry']})]
|
||||
feeds = [(u'Wiadomości', u'http://feeds.feedburner.com/niebezpiecznik/'),
|
||||
('Blog', 'http://feeds.feedburner.com/niebezpiecznik/linkblog/')]
|
||||
('Blog', 'http://feeds.feedburner.com/niebezpiecznik/linkblog/')]
|
||||
|
@ -8,23 +8,24 @@ class WirtualneMedia(BasicNewsRecipe):
|
||||
use_embedded_content = False
|
||||
remove_empty_feeds = True
|
||||
__author__ = 'fenuks'
|
||||
extra_css = '.thumbnail {float:left; max-width:150px; margin-right:5px;}'
|
||||
description = u'Portal o mediach, reklamie, internecie, PR, telekomunikacji - nr 1 w Polsce - WirtualneMedia.pl - wiadomości z pierwszej ręki.'
|
||||
category = 'internet'
|
||||
language = 'pl'
|
||||
masthead_url= 'http://i.wp.pl/a/f/jpeg/8654/wirtualnemedia.jpeg'
|
||||
cover_url= 'http://static.wirtualnemedia.pl/img/logo_wirtualnemedia_newsletter.gif'
|
||||
remove_tags=[dict(id=['header', 'footer'])]
|
||||
feeds = [(u'Gospodarka', u'http://www.wirtualnemedia.pl/rss/wm_gospodarka.xml'),
|
||||
(u'Internet', u'http://www.wirtualnemedia.pl/rss/wm_internet.xml'),
|
||||
(u'Kultura', u'http://www.wirtualnemedia.pl/rss/wm_kulturarozrywka.xml'),
|
||||
(u'Badania', u'http://www.wirtualnemedia.pl/rss/wm_marketing.xml'),
|
||||
(u'Prasa', u'http://www.wirtualnemedia.pl/rss/wm_prasa.xml'),
|
||||
(u'Radio', u'http://www.wirtualnemedia.pl/rss/wm_radio.xml'),
|
||||
(u'Reklama', u'http://www.wirtualnemedia.pl/rss/wm_reklama.xml'),
|
||||
(u'PR', u'http://www.wirtualnemedia.pl/rss/wm_relations.xml'),
|
||||
(u'Technologie', u'http://www.wirtualnemedia.pl/rss/wm_telekomunikacja.xml'),
|
||||
(u'Telewizja', u'http://www.wirtualnemedia.pl/rss/wm_telewizja_rss.xml')
|
||||
]
|
||||
feeds = [(u'Gospodarka', u'http://www.wirtualnemedia.pl/rss/wm_gospodarka.xml'),
|
||||
(u'Internet', u'http://www.wirtualnemedia.pl/rss/wm_internet.xml'),
|
||||
(u'Kultura', u'http://www.wirtualnemedia.pl/rss/wm_kulturarozrywka.xml'),
|
||||
(u'Badania', u'http://www.wirtualnemedia.pl/rss/wm_marketing.xml'),
|
||||
(u'Prasa', u'http://www.wirtualnemedia.pl/rss/wm_prasa.xml'),
|
||||
(u'Radio', u'http://www.wirtualnemedia.pl/rss/wm_radio.xml'),
|
||||
(u'Reklama', u'http://www.wirtualnemedia.pl/rss/wm_reklama.xml'),
|
||||
(u'PR', u'http://www.wirtualnemedia.pl/rss/wm_relations.xml'),
|
||||
(u'Technologie', u'http://www.wirtualnemedia.pl/rss/wm_telekomunikacja.xml'),
|
||||
(u'Telewizja', u'http://www.wirtualnemedia.pl/rss/wm_telewizja_rss.xml')
|
||||
]
|
||||
|
||||
def print_version(self, url):
|
||||
return url.replace('artykul', 'print')
|
||||
return url.replace('artykul', 'print')
|
||||
|
@ -1,5 +1,6 @@
|
||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:fdm=marker:ai
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class ZTS(BasicNewsRecipe):
|
||||
title = u'Zaufana Trzecia Strona'
|
||||
__author__ = 'fenuks'
|
||||
@ -7,6 +8,7 @@ class ZTS(BasicNewsRecipe):
|
||||
category = 'IT, security'
|
||||
language = 'pl'
|
||||
cover_url = 'http://www.zaufanatrzeciastrona.pl/wp-content/uploads/2012/08/z3s_h100.png'
|
||||
extra_css = '.thumbnail {float: left; margin-right:5px;}'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
|
Loading…
x
Reference in New Issue
Block a user