mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-08 02:34:06 -04:00
Updated various Polish recipes
This commit is contained in:
parent
cff95f4ed0
commit
5da024c674
@ -9,11 +9,12 @@ class Adventure_zone(BasicNewsRecipe):
|
|||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
oldest_article = 20
|
oldest_article = 20
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
|
cover_url = 'http://www.adventure-zone.info/inne/logoaz_2012.png'
|
||||||
index='http://www.adventure-zone.info/fusion/'
|
index='http://www.adventure-zone.info/fusion/'
|
||||||
use_embedded_content=False
|
use_embedded_content=False
|
||||||
preprocess_regexps = [(re.compile(r"<td class='capmain'>Komentarze</td>", re.IGNORECASE), lambda m: ''),
|
preprocess_regexps = [(re.compile(r"<td class='capmain'>Komentarze</td>", re.IGNORECASE), lambda m: ''),
|
||||||
(re.compile(r'\<table .*?\>'), lambda match: ''),
|
(re.compile(r'</?table.*?>'), lambda match: ''),
|
||||||
(re.compile(r'\<tbody\>'), lambda match: '')]
|
(re.compile(r'</?tbody.*?>'), lambda match: '')]
|
||||||
remove_tags_before= dict(name='td', attrs={'class':'main-bg'})
|
remove_tags_before= dict(name='td', attrs={'class':'main-bg'})
|
||||||
remove_tags= [dict(name='img', attrs={'alt':'Drukuj'})]
|
remove_tags= [dict(name='img', attrs={'alt':'Drukuj'})]
|
||||||
remove_tags_after= dict(id='comments')
|
remove_tags_after= dict(id='comments')
|
||||||
@ -36,11 +37,11 @@ class Adventure_zone(BasicNewsRecipe):
|
|||||||
return feeds
|
return feeds
|
||||||
|
|
||||||
|
|
||||||
def get_cover_url(self):
|
'''def get_cover_url(self):
|
||||||
soup = self.index_to_soup('http://www.adventure-zone.info/fusion/news.php')
|
soup = self.index_to_soup('http://www.adventure-zone.info/fusion/news.php')
|
||||||
cover=soup.find(id='box_OstatninumerAZ')
|
cover=soup.find(id='box_OstatninumerAZ')
|
||||||
self.cover_url='http://www.adventure-zone.info/fusion/'+ cover.center.a.img['src']
|
self.cover_url='http://www.adventure-zone.info/fusion/'+ cover.center.a.img['src']
|
||||||
return getattr(self, 'cover_url', self.cover_url)
|
return getattr(self, 'cover_url', self.cover_url)'''
|
||||||
|
|
||||||
|
|
||||||
def skip_ad_pages(self, soup):
|
def skip_ad_pages(self, soup):
|
||||||
|
@ -3,11 +3,11 @@ from calibre.web.feeds.news import BasicNewsRecipe
|
|||||||
class Android_com_pl(BasicNewsRecipe):
|
class Android_com_pl(BasicNewsRecipe):
|
||||||
title = u'Android.com.pl'
|
title = u'Android.com.pl'
|
||||||
__author__ = 'fenuks'
|
__author__ = 'fenuks'
|
||||||
description = 'Android.com.pl - biggest polish Android site'
|
description = u'Android.com.pl - to największe w Polsce centrum Android OS. Znajdziesz tu: nowości, forum, pomoc, recenzje, gry, aplikacje.'
|
||||||
category = 'Android, mobile'
|
category = 'Android, mobile'
|
||||||
language = 'pl'
|
language = 'pl'
|
||||||
use_embedded_content=True
|
use_embedded_content=True
|
||||||
cover_url =u'http://upload.wikimedia.org/wikipedia/commons/thumb/d/d7/Android_robot.svg/220px-Android_robot.svg.png'
|
cover_url =u'http://android.com.pl/wp-content/themes/android/images/logo.png'
|
||||||
oldest_article = 8
|
oldest_article = 8
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
feeds = [(u'Android', u'http://android.com.pl/component/content/frontpage/frontpage.feed?type=rss')]
|
feeds = [(u'Android', u'http://android.com.pl/feed/')]
|
||||||
|
@ -7,6 +7,7 @@ class Dzieje(BasicNewsRecipe):
|
|||||||
cover_url = 'http://www.dzieje.pl/sites/default/files/dzieje_logo.png'
|
cover_url = 'http://www.dzieje.pl/sites/default/files/dzieje_logo.png'
|
||||||
category = 'history'
|
category = 'history'
|
||||||
language = 'pl'
|
language = 'pl'
|
||||||
|
ignore_duplicate_articles = {'title', 'url'}
|
||||||
index = 'http://dzieje.pl'
|
index = 'http://dzieje.pl'
|
||||||
oldest_article = 8
|
oldest_article = 8
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
@ -14,11 +15,56 @@ class Dzieje(BasicNewsRecipe):
|
|||||||
no_stylesheets= True
|
no_stylesheets= True
|
||||||
keep_only_tags = [dict(name='h1', attrs={'class':'title'}), dict(id='content-area')]
|
keep_only_tags = [dict(name='h1', attrs={'class':'title'}), dict(id='content-area')]
|
||||||
remove_tags = [dict(attrs={'class':'field field-type-computed field-field-tagi'}), dict(id='dogory')]
|
remove_tags = [dict(attrs={'class':'field field-type-computed field-field-tagi'}), dict(id='dogory')]
|
||||||
feeds = [(u'Dzieje', u'http://dzieje.pl/rss.xml')]
|
#feeds = [(u'Dzieje', u'http://dzieje.pl/rss.xml')]
|
||||||
|
|
||||||
|
def append_page(self, soup, appendtag):
|
||||||
|
tag = appendtag.find('li', attrs={'class':'pager-next'})
|
||||||
|
if tag:
|
||||||
|
while tag:
|
||||||
|
url = tag.a['href']
|
||||||
|
if not url.startswith('http'):
|
||||||
|
url = 'http://dzieje.pl'+tag.a['href']
|
||||||
|
soup2 = self.index_to_soup(url)
|
||||||
|
pagetext = soup2.find(id='content-area').find(attrs={'class':'content'})
|
||||||
|
for r in pagetext.findAll(attrs={'class':['fieldgroup group-groupkul', 'fieldgroup group-zdjeciekult', 'fieldgroup group-zdjecieciekaw', 'fieldgroup group-zdjecieksiazka', 'fieldgroup group-zdjeciedu', 'field field-type-filefield field-field-zdjecieglownawyd']}):
|
||||||
|
r.extract()
|
||||||
|
pos = len(appendtag.contents)
|
||||||
|
appendtag.insert(pos, pagetext)
|
||||||
|
tag = soup2.find('li', attrs={'class':'pager-next'})
|
||||||
|
for r in appendtag.findAll(attrs={'class':['item-list', 'field field-type-computed field-field-tagi', ]}):
|
||||||
|
r.extract()
|
||||||
|
|
||||||
|
def find_articles(self, url):
|
||||||
|
articles = []
|
||||||
|
soup=self.index_to_soup(url)
|
||||||
|
tag=soup.find(id='content-area').div.div
|
||||||
|
for i in tag.findAll('div', recursive=False):
|
||||||
|
temp = i.find(attrs={'class':'views-field-title'}).span.a
|
||||||
|
title = temp.string
|
||||||
|
url = self.index + temp['href']
|
||||||
|
date = '' #i.find(attrs={'class':'views-field-created'}).span.string
|
||||||
|
articles.append({'title' : title,
|
||||||
|
'url' : url,
|
||||||
|
'date' : date,
|
||||||
|
'description' : ''
|
||||||
|
})
|
||||||
|
return articles
|
||||||
|
|
||||||
|
def parse_index(self):
|
||||||
|
feeds = []
|
||||||
|
feeds.append((u"Wiadomości", self.find_articles('http://dzieje.pl/wiadomosci')))
|
||||||
|
feeds.append((u"Kultura i sztuka", self.find_articles('http://dzieje.pl/kulturaisztuka')))
|
||||||
|
feeds.append((u"Film", self.find_articles('http://dzieje.pl/kino')))
|
||||||
|
feeds.append((u"Rozmaitości historyczne", self.find_articles('http://dzieje.pl/rozmaitości')))
|
||||||
|
feeds.append((u"Książka", self.find_articles('http://dzieje.pl/ksiazka')))
|
||||||
|
feeds.append((u"Wystawa", self.find_articles('http://dzieje.pl/wystawa')))
|
||||||
|
feeds.append((u"Edukacja", self.find_articles('http://dzieje.pl/edukacja')))
|
||||||
|
feeds.append((u"Dzieje się", self.find_articles('http://dzieje.pl/wydarzenia')))
|
||||||
|
return feeds
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
def preprocess_html(self, soup):
|
||||||
for a in soup('a'):
|
for a in soup('a'):
|
||||||
if a.has_key('href') and 'http://' not in a['href'] and 'https://' not in a['href']:
|
if a.has_key('href') and 'http://' not in a['href'] and 'https://' not in a['href']:
|
||||||
a['href']=self.index + a['href']
|
a['href']=self.index + a['href']
|
||||||
|
self.append_page(soup, soup.body)
|
||||||
return soup
|
return soup
|
@ -17,6 +17,7 @@ class FilmWebPl(BasicNewsRecipe):
|
|||||||
preprocess_regexps = [(re.compile(u'\(kliknij\,\ aby powiększyć\)', re.IGNORECASE), lambda m: ''), ]#(re.compile(ur' | ', re.IGNORECASE), lambda m: '')]
|
preprocess_regexps = [(re.compile(u'\(kliknij\,\ aby powiększyć\)', re.IGNORECASE), lambda m: ''), ]#(re.compile(ur' | ', re.IGNORECASE), lambda m: '')]
|
||||||
extra_css = '.hdrBig {font-size:22px;} ul {list-style-type:none; padding: 0; margin: 0;}'
|
extra_css = '.hdrBig {font-size:22px;} ul {list-style-type:none; padding: 0; margin: 0;}'
|
||||||
remove_tags= [dict(name='div', attrs={'class':['recommendOthers']}), dict(name='ul', attrs={'class':'fontSizeSet'}), dict(attrs={'class':'userSurname anno'})]
|
remove_tags= [dict(name='div', attrs={'class':['recommendOthers']}), dict(name='ul', attrs={'class':'fontSizeSet'}), dict(attrs={'class':'userSurname anno'})]
|
||||||
|
remove_attributes = ['style',]
|
||||||
keep_only_tags= [dict(name='h1', attrs={'class':['hdrBig', 'hdrEntity']}), dict(name='div', attrs={'class':['newsInfo', 'newsInfoSmall', 'reviewContent description']})]
|
keep_only_tags= [dict(name='h1', attrs={'class':['hdrBig', 'hdrEntity']}), dict(name='div', attrs={'class':['newsInfo', 'newsInfoSmall', 'reviewContent description']})]
|
||||||
feeds = [(u'News / Filmy w produkcji', 'http://www.filmweb.pl/feed/news/category/filminproduction'),
|
feeds = [(u'News / Filmy w produkcji', 'http://www.filmweb.pl/feed/news/category/filminproduction'),
|
||||||
(u'News / Festiwale, nagrody i przeglądy', u'http://www.filmweb.pl/feed/news/category/festival'),
|
(u'News / Festiwale, nagrody i przeglądy', u'http://www.filmweb.pl/feed/news/category/festival'),
|
||||||
@ -50,4 +51,9 @@ class FilmWebPl(BasicNewsRecipe):
|
|||||||
for i in soup.findAll('sup'):
|
for i in soup.findAll('sup'):
|
||||||
if not i.string or i.string.startswith('(kliknij'):
|
if not i.string or i.string.startswith('(kliknij'):
|
||||||
i.extract()
|
i.extract()
|
||||||
|
tag = soup.find(name='ul', attrs={'class':'inline sep-line'})
|
||||||
|
if tag:
|
||||||
|
tag.name = 'div'
|
||||||
|
for t in tag.findAll('li'):
|
||||||
|
t.name = 'div'
|
||||||
return soup
|
return soup
|
||||||
|
@ -4,9 +4,10 @@ import re
|
|||||||
class Gildia(BasicNewsRecipe):
|
class Gildia(BasicNewsRecipe):
|
||||||
title = u'Gildia.pl'
|
title = u'Gildia.pl'
|
||||||
__author__ = 'fenuks'
|
__author__ = 'fenuks'
|
||||||
description = 'Gildia - cultural site'
|
description = u'Fantastyczny Portal Kulturalny - newsy, recenzje, galerie, wywiady. Literatura, film, gry komputerowe i planszowe, komiks, RPG, sklep. Nie lekceważ potęgi wyobraźni!'
|
||||||
cover_url = 'http://www.film.gildia.pl/_n_/portal/redakcja/logo/logo-gildia.pl-500.jpg'
|
cover_url = 'http://www.film.gildia.pl/_n_/portal/redakcja/logo/logo-gildia.pl-500.jpg'
|
||||||
category = 'culture'
|
category = 'culture'
|
||||||
|
cover_url = 'http://gildia.pl/images/logo-main.png'
|
||||||
language = 'pl'
|
language = 'pl'
|
||||||
oldest_article = 8
|
oldest_article = 8
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
@ -23,10 +24,13 @@ class Gildia(BasicNewsRecipe):
|
|||||||
content = soup.find('div', attrs={'class':'news'})
|
content = soup.find('div', attrs={'class':'news'})
|
||||||
if 'recenzj' in soup.title.string.lower():
|
if 'recenzj' in soup.title.string.lower():
|
||||||
for link in content.findAll(name='a'):
|
for link in content.findAll(name='a'):
|
||||||
if 'recenzj' in link['href']:
|
if 'recenzj' in link['href'] or 'muzyka/plyty' in link['href']:
|
||||||
self.log.warn('odnosnik')
|
|
||||||
self.log.warn(link['href'])
|
|
||||||
return self.index_to_soup(link['href'], raw=True)
|
return self.index_to_soup(link['href'], raw=True)
|
||||||
|
if 'fragmen' in soup.title.string.lower():
|
||||||
|
for link in content.findAll(name='a'):
|
||||||
|
if 'fragment' in link['href']:
|
||||||
|
return self.index_to_soup(link['href'], raw=True)
|
||||||
|
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
def preprocess_html(self, soup):
|
||||||
for a in soup('a'):
|
for a in soup('a'):
|
||||||
|
@ -1,19 +1,20 @@
|
|||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
from calibre.ebooks.BeautifulSoup import BeautifulSoup
|
||||||
class Gram_pl(BasicNewsRecipe):
|
class Gram_pl(BasicNewsRecipe):
|
||||||
title = u'Gram.pl'
|
title = u'Gram.pl'
|
||||||
__author__ = 'fenuks'
|
__author__ = 'fenuks'
|
||||||
description = 'Gram.pl - site about computer games'
|
description = u'Serwis społecznościowy o grach: recenzje, newsy, zapowiedzi, encyklopedia gier, forum. Gry PC, PS3, X360, PS Vita, sprzęt dla graczy.'
|
||||||
category = 'games'
|
category = 'games'
|
||||||
language = 'pl'
|
language = 'pl'
|
||||||
oldest_article = 8
|
oldest_article = 8
|
||||||
index='http://www.gram.pl'
|
index='http://www.gram.pl'
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
|
ignore_duplicate_articles = {'title', 'url'}
|
||||||
no_stylesheets= True
|
no_stylesheets= True
|
||||||
extra_css = 'h2 {font-style: italic; font-size:20px;} .picbox div {float: left;}'
|
#extra_css = 'h2 {font-style: italic; font-size:20px;} .picbox div {float: left;}'
|
||||||
cover_url=u'http://www.gram.pl/www/01/img/grampl_zima.png'
|
cover_url=u'http://www.gram.pl/www/01/img/grampl_zima.png'
|
||||||
remove_tags= [dict(name='p', attrs={'class':['extraText', 'must-log-in']}), dict(attrs={'class':['el', 'headline', 'post-info', 'entry-footer clearfix']}), dict(name='div', attrs={'class':['twojaOcena', 'comment-body', 'comment-author vcard', 'comment-meta commentmetadata', 'tw_button', 'entry-comment-counter', 'snap_nopreview sharing robots-nocontent', 'sharedaddy sd-sharing-enabled']}), dict(id=['igit_rpwt_css', 'comments', 'reply-title', 'igit_title'])]
|
keep_only_tags= [dict(id='articleModule')]
|
||||||
keep_only_tags= [dict(name='div', attrs={'class':['main', 'arkh-postmetadataheader', 'arkh-postcontent', 'post', 'content', 'news_header', 'news_subheader', 'news_text']}), dict(attrs={'class':['contentheading', 'contentpaneopen']}), dict(name='article')]
|
remove_tags = [dict(attrs={'class':['breadCrump', 'dymek', 'articleFooter']})]
|
||||||
feeds = [(u'Informacje', u'http://www.gram.pl/feed_news.asp'),
|
feeds = [(u'Informacje', u'http://www.gram.pl/feed_news.asp'),
|
||||||
(u'Publikacje', u'http://www.gram.pl/feed_news.asp?type=articles'),
|
(u'Publikacje', u'http://www.gram.pl/feed_news.asp?type=articles'),
|
||||||
(u'Kolektyw- Indie Games', u'http://indie.gram.pl/feed/'),
|
(u'Kolektyw- Indie Games', u'http://indie.gram.pl/feed/'),
|
||||||
@ -28,35 +29,21 @@ class Gram_pl(BasicNewsRecipe):
|
|||||||
feed.articles.remove(article)
|
feed.articles.remove(article)
|
||||||
return feeds
|
return feeds
|
||||||
|
|
||||||
def append_page(self, soup, appendtag):
|
|
||||||
nexturl = appendtag.find('a', attrs={'class':'cpn'})
|
|
||||||
while nexturl:
|
|
||||||
soup2 = self.index_to_soup('http://www.gram.pl'+ nexturl['href'])
|
|
||||||
r=appendtag.find(id='pgbox')
|
|
||||||
if r:
|
|
||||||
r.extract()
|
|
||||||
pagetext = soup2.find(attrs={'class':'main'})
|
|
||||||
r=pagetext.find('h1')
|
|
||||||
if r:
|
|
||||||
r.extract()
|
|
||||||
r=pagetext.find('h2')
|
|
||||||
if r:
|
|
||||||
r.extract()
|
|
||||||
for r in pagetext.findAll('script'):
|
|
||||||
r.extract()
|
|
||||||
pos = len(appendtag.contents)
|
|
||||||
appendtag.insert(pos, pagetext)
|
|
||||||
nexturl = appendtag.find('a', attrs={'class':'cpn'})
|
|
||||||
r=appendtag.find(id='pgbox')
|
|
||||||
if r:
|
|
||||||
r.extract()
|
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
def preprocess_html(self, soup):
|
||||||
self.append_page(soup, soup.body)
|
tag=soup.find(name='div', attrs={'class':'summary'})
|
||||||
tag=soup.findAll(name='div', attrs={'class':'picbox'})
|
if tag:
|
||||||
for t in tag:
|
tag.find(attrs={'class':'pros'}).insert(0, BeautifulSoup('<h2>Plusy:</h2>').h2)
|
||||||
t['style']='float: left;'
|
tag.find(attrs={'class':'cons'}).insert(0, BeautifulSoup('<h2>Minusy:</h2>').h2)
|
||||||
|
tag = soup.find(name='section', attrs={'class':'cenzurka'})
|
||||||
|
if tag:
|
||||||
|
rate = tag.p.img['data-ocena']
|
||||||
|
tag.p.img.extract()
|
||||||
|
tag.p.insert(len(tag.p.contents)-2, BeautifulSoup('<h2>Ocena: {0}</h2>'.format(rate)).h2)
|
||||||
for a in soup('a'):
|
for a in soup('a'):
|
||||||
if a.has_key('href') and 'http://' not in a['href'] and 'https://' not in a['href']:
|
if a.has_key('href') and 'http://' not in a['href'] and 'https://' not in a['href']:
|
||||||
a['href']=self.index + a['href']
|
a['href']=self.index + a['href']
|
||||||
|
tag=soup.find(name='span', attrs={'class':'platforma'})
|
||||||
|
if tag:
|
||||||
|
tag.name = 'p'
|
||||||
return soup
|
return soup
|
||||||
|
@ -3,7 +3,7 @@ from calibre.web.feeds.news import BasicNewsRecipe
|
|||||||
class Historia_org_pl(BasicNewsRecipe):
|
class Historia_org_pl(BasicNewsRecipe):
|
||||||
title = u'Historia.org.pl'
|
title = u'Historia.org.pl'
|
||||||
__author__ = 'fenuks'
|
__author__ = 'fenuks'
|
||||||
description = u'history site'
|
description = u'Artykuły dotyczące historii w układzie epok i tematów, forum. Najlepsza strona historii. Matura z historii i egzamin gimnazjalny z historii.'
|
||||||
cover_url = 'http://lh3.googleusercontent.com/_QeRQus12wGg/TOvHsZ2GN7I/AAAAAAAAD_o/LY1JZDnq7ro/logo5.jpg'
|
cover_url = 'http://lh3.googleusercontent.com/_QeRQus12wGg/TOvHsZ2GN7I/AAAAAAAAD_o/LY1JZDnq7ro/logo5.jpg'
|
||||||
category = 'history'
|
category = 'history'
|
||||||
language = 'pl'
|
language = 'pl'
|
||||||
@ -12,16 +12,15 @@ class Historia_org_pl(BasicNewsRecipe):
|
|||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
use_embedded_content = True
|
use_embedded_content = True
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
|
ignore_duplicate_articles = {'title', 'url'}
|
||||||
|
|
||||||
feeds = [(u'Wszystkie', u'http://www.historia.org.pl/index.php?format=feed&type=atom'),
|
|
||||||
(u'Wiadomości', u'http://www.historia.org.pl/index.php/wiadomosci.feed?type=atom'),
|
feeds = [(u'Wszystkie', u'http://historia.org.pl/feed/'),
|
||||||
(u'Publikacje', u'http://www.historia.org.pl/index.php/publikacje.feed?type=atom'),
|
(u'Wiadomości', u'http://historia.org.pl/Kategoria/wiadomosci/feed/'),
|
||||||
(u'Publicystyka', u'http://www.historia.org.pl/index.php/publicystyka.feed?type=atom'),
|
(u'Publikacje', u'http://historia.org.pl/Kategoria/artykuly/feed/'),
|
||||||
(u'Recenzje', u'http://historia.org.pl/index.php/recenzje.feed?type=atom'),
|
(u'Publicystyka', u'http://historia.org.pl/Kategoria/publicystyka/feed/'),
|
||||||
(u'Kultura i sztuka', u'http://www.historia.org.pl/index.php/kultura-i-sztuka.feed?type=atom'),
|
(u'Recenzje', u'http://historia.org.pl/Kategoria/recenzje/feed/'),
|
||||||
(u'Rekonstykcje', u'http://www.historia.org.pl/index.php/rekonstrukcje.feed?type=atom'),
|
(u'Projekty', u'http://historia.org.pl/Kategoria/projekty/feed/'),]
|
||||||
(u'Projekty', u'http://www.historia.org.pl/index.php/projekty.feed?type=atom'),
|
|
||||||
(u'Konkursy'), (u'http://www.historia.org.pl/index.php/konkursy.feed?type=atom')]
|
|
||||||
|
|
||||||
|
|
||||||
def print_version(self, url):
|
def print_version(self, url):
|
||||||
|
@ -9,6 +9,21 @@ class Kosmonauta(BasicNewsRecipe):
|
|||||||
language = 'pl'
|
language = 'pl'
|
||||||
cover_url='http://bi.gazeta.pl/im/4/10393/z10393414X,Kosmonauta-net.jpg'
|
cover_url='http://bi.gazeta.pl/im/4/10393/z10393414X,Kosmonauta-net.jpg'
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
|
INDEX = 'http://www.kosmonauta.net'
|
||||||
oldest_article = 7
|
oldest_article = 7
|
||||||
|
no_stylesheets = True
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
feeds = [(u'Kosmonauta.net', u'http://www.kosmonauta.net/index.php/feed/rss.html')]
|
keep_only_tags = [dict(name='div', attrs={'class':'item-page'})]
|
||||||
|
remove_tags = [dict(attrs={'class':['article-tools clearfix', 'cedtag', 'nav clearfix', 'jwDisqusForm']})]
|
||||||
|
remove_tags_after = dict(name='div', attrs={'class':'cedtag'})
|
||||||
|
feeds = [(u'Kosmonauta.net', u'http://www.kosmonauta.net/?format=feed&type=atom')]
|
||||||
|
|
||||||
|
def preprocess_html(self, soup):
|
||||||
|
for a in soup.findAll(name='a'):
|
||||||
|
if a.has_key('href'):
|
||||||
|
href = a['href']
|
||||||
|
if not href.startswith('http'):
|
||||||
|
a['href'] = self.INDEX + href
|
||||||
|
print '%%%%%%%%%%%%%%%%%%%%%%%%%', a['href']
|
||||||
|
return soup
|
||||||
|
|
@ -1,15 +1,16 @@
|
|||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
import re
|
import re
|
||||||
class Ksiazka_net_pl(BasicNewsRecipe):
|
class Ksiazka_net_pl(BasicNewsRecipe):
|
||||||
title = u'ksiazka.net.pl'
|
title = u'książka.net.pl'
|
||||||
__author__ = 'fenuks'
|
__author__ = 'fenuks'
|
||||||
description = u'Ksiazka.net.pl - book vortal'
|
description = u'Portal Księgarski - tematyczny serwis o książkach. Wydarzenia z rynku księgarsko-wydawniczego, nowości, zapowiedzi, bestsellery, setki recenzji. Niezbędne informacje dla każdego miłośnika książek, księgarza, bibliotekarza i wydawcy.'
|
||||||
cover_url = 'http://www.ksiazka.net.pl/fileadmin/templates/ksiazka.net.pl/images/1PortalKsiegarski-logo.jpg'
|
cover_url = 'http://www.ksiazka.net.pl/fileadmin/templates/ksiazka.net.pl/images/1PortalKsiegarski-logo.jpg'
|
||||||
category = 'books'
|
category = 'books'
|
||||||
language = 'pl'
|
language = 'pl'
|
||||||
oldest_article = 8
|
oldest_article = 8
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
no_stylesheets= True
|
no_stylesheets= True
|
||||||
|
remove_empty_feeds = True
|
||||||
#extra_css = 'img {float: right;}'
|
#extra_css = 'img {float: right;}'
|
||||||
preprocess_regexps = [(re.compile(ur'Podoba mi się, kupuję:'), lambda match: '<br />')]
|
preprocess_regexps = [(re.compile(ur'Podoba mi się, kupuję:'), lambda match: '<br />')]
|
||||||
remove_tags_before= dict(name='div', attrs={'class':'m-body'})
|
remove_tags_before= dict(name='div', attrs={'class':'m-body'})
|
||||||
|
@ -2,7 +2,7 @@
|
|||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
class Mlody_technik(BasicNewsRecipe):
|
class Mlody_technik(BasicNewsRecipe):
|
||||||
title = u'Mlody technik'
|
title = u'Młody technik'
|
||||||
__author__ = 'fenuks'
|
__author__ = 'fenuks'
|
||||||
description = u'Młody technik'
|
description = u'Młody technik'
|
||||||
category = 'science'
|
category = 'science'
|
||||||
|
Loading…
x
Reference in New Issue
Block a user