Updated various Polish recipes

This commit is contained in:
Kovid Goyal 2012-04-18 09:33:44 +05:30
parent 18cd783913
commit 015d45a06b
19 changed files with 188 additions and 18 deletions

View File

@ -9,6 +9,7 @@ class Adventure_zone(BasicNewsRecipe):
no_stylesheets = True no_stylesheets = True
oldest_article = 20 oldest_article = 20
max_articles_per_feed = 100 max_articles_per_feed = 100
index='http://www.adventure-zone.info/fusion/'
use_embedded_content=False use_embedded_content=False
preprocess_regexps = [(re.compile(r"<td class='capmain'>Komentarze</td>", re.IGNORECASE), lambda m: '')] preprocess_regexps = [(re.compile(r"<td class='capmain'>Komentarze</td>", re.IGNORECASE), lambda m: '')]
remove_tags_before= dict(name='td', attrs={'class':'main-bg'}) remove_tags_before= dict(name='td', attrs={'class':'main-bg'})
@ -45,6 +46,19 @@ class Adventure_zone(BasicNewsRecipe):
skip_tag = skip_tag.findAll(name='a') skip_tag = skip_tag.findAll(name='a')
for r in skip_tag: for r in skip_tag:
if r.strong: if r.strong:
word=r.strong.string word=r.strong.string.lower()
if word and (('zapowied' in word) or ('recenzj' in word) or ('solucj' in word)): if word and (('zapowied' in word) or ('recenzj' in word) or ('solucj' in word) or ('poradnik' in word)):
return self.index_to_soup('http://www.adventure-zone.info/fusion/print.php?type=A&item'+r['href'][r['href'].find('article_id')+7:], raw=True) return self.index_to_soup('http://www.adventure-zone.info/fusion/print.php?type=A&item'+r['href'][r['href'].find('article_id')+7:], raw=True)
def preprocess_html(self, soup):
footer=soup.find(attrs={'class':'news-footer middle-border'})
if footer and len(footer('a'))>=2:
footer('a')[1].extract()
for item in soup.findAll(style=True):
del item['style']
for a in soup('a'):
if a.has_key('href') and 'http://' not in a['href'] and 'https://' not in a['href']:
a['href']=self.index + a['href']
return soup

View File

@ -68,4 +68,7 @@ class Benchmark_pl(BasicNewsRecipe):
self.image_article(soup, soup.body) self.image_article(soup, soup.body)
else: else:
self.append_page(soup, soup.body) self.append_page(soup, soup.body)
for a in soup('a'):
if a.has_key('href') and 'http://' not in a['href'] and 'https://' not in a['href']:
a['href']=self.INDEX + a['href']
return soup return soup

View File

@ -6,6 +6,7 @@ class CD_Action(BasicNewsRecipe):
description = 'cdaction.pl - polish games magazine site' description = 'cdaction.pl - polish games magazine site'
category = 'games' category = 'games'
language = 'pl' language = 'pl'
index='http://www.cdaction.pl'
oldest_article = 8 oldest_article = 8
max_articles_per_feed = 100 max_articles_per_feed = 100
no_stylesheets= True no_stylesheets= True
@ -17,4 +18,10 @@ class CD_Action(BasicNewsRecipe):
def get_cover_url(self): def get_cover_url(self):
soup = self.index_to_soup('http://www.cdaction.pl/magazyn/') soup = self.index_to_soup('http://www.cdaction.pl/magazyn/')
self.cover_url='http://www.cdaction.pl'+ soup.find(id='wspolnik').div.a['href'] self.cover_url='http://www.cdaction.pl'+ soup.find(id='wspolnik').div.a['href']
return getattr(self, 'cover_url', self.cover_url) return getattr(self, 'cover_url', self.cover_url)
def preprocess_html(self, soup):
for a in soup('a'):
if a.has_key('href') and 'http://' not in a['href'] and 'https://' not in a['href']:
a['href']=self.index + a['href']
return soup

View File

@ -11,6 +11,7 @@ class Dobreprogramy_pl(BasicNewsRecipe):
cover_url = 'http://userlogos.org/files/logos/Karmody/dobreprogramy_01.png' cover_url = 'http://userlogos.org/files/logos/Karmody/dobreprogramy_01.png'
description = u'Aktualności i blogi z dobreprogramy.pl' description = u'Aktualności i blogi z dobreprogramy.pl'
encoding = 'utf-8' encoding = 'utf-8'
index='http://www.dobreprogramy.pl/'
no_stylesheets = True no_stylesheets = True
language = 'pl' language = 'pl'
extra_css = '.title {font-size:22px;}' extra_css = '.title {font-size:22px;}'
@ -22,3 +23,10 @@ class Dobreprogramy_pl(BasicNewsRecipe):
#remove_tags = [dict(name='div', attrs={'class':['komentarze', 'block', 'portalInfo', 'menuBar', 'topBar']})] #remove_tags = [dict(name='div', attrs={'class':['komentarze', 'block', 'portalInfo', 'menuBar', 'topBar']})]
feeds = [(u'Aktualności', 'http://feeds.feedburner.com/dobreprogramy/Aktualnosci'), feeds = [(u'Aktualności', 'http://feeds.feedburner.com/dobreprogramy/Aktualnosci'),
('Blogi', 'http://feeds.feedburner.com/dobreprogramy/BlogCzytelnikow')] ('Blogi', 'http://feeds.feedburner.com/dobreprogramy/BlogCzytelnikow')]
def preprocess_html(self, soup):
for a in soup('a'):
if a.has_key('href') and 'http://' not in a['href'] and 'https://' not in a['href']:
a['href']=self.index + a['href']
return soup

View File

@ -7,6 +7,7 @@ class Dzieje(BasicNewsRecipe):
cover_url = 'http://www.dzieje.pl/sites/default/files/dzieje_logo.png' cover_url = 'http://www.dzieje.pl/sites/default/files/dzieje_logo.png'
category = 'history' category = 'history'
language = 'pl' language = 'pl'
index='http://dzieje.pl'
oldest_article = 8 oldest_article = 8
max_articles_per_feed = 100 max_articles_per_feed = 100
remove_javascript=True remove_javascript=True
@ -15,3 +16,10 @@ class Dzieje(BasicNewsRecipe):
remove_tags_after= dict(id='dogory') remove_tags_after= dict(id='dogory')
remove_tags=[dict(id='dogory')] remove_tags=[dict(id='dogory')]
feeds = [(u'Dzieje', u'http://dzieje.pl/rss.xml')] feeds = [(u'Dzieje', u'http://dzieje.pl/rss.xml')]
def preprocess_html(self, soup):
for a in soup('a'):
if a.has_key('href') and 'http://' not in a['href'] and 'https://' not in a['href']:
a['href']=self.index + a['href']
return soup

View File

@ -21,3 +21,8 @@ class eioba(BasicNewsRecipe):
(u'Rozrywka', u'http://www.eioba.pl/feed/categories/10.xml'), (u'Rozrywka', u'http://www.eioba.pl/feed/categories/10.xml'),
(u'Rożne', u'http://www.eioba.pl/feed/categories/9.xml') (u'Rożne', u'http://www.eioba.pl/feed/categories/9.xml')
] ]
def preprocess_html(self, soup):
for item in soup.findAll(style=True):
del item['style']
return soup

View File

@ -7,6 +7,7 @@ class eMuzyka(BasicNewsRecipe):
description = u'Emuzyka to największa i najpopularniejsza strona o muzyce w Polsce' description = u'Emuzyka to największa i najpopularniejsza strona o muzyce w Polsce'
category = 'music' category = 'music'
language = 'pl' language = 'pl'
index='http://www.emuzyka.pl'
cover_url='http://s.emuzyka.pl/img/emuzyka_invert_small.jpg' cover_url='http://s.emuzyka.pl/img/emuzyka_invert_small.jpg'
no_stylesheets = True no_stylesheets = True
oldest_article = 7 oldest_article = 7
@ -14,3 +15,9 @@ class eMuzyka(BasicNewsRecipe):
keep_only_tags=[dict(name='div', attrs={'id':'news_container'}), dict(name='h3'), dict(name='div', attrs={'class':'review_text'})] keep_only_tags=[dict(name='div', attrs={'id':'news_container'}), dict(name='h3'), dict(name='div', attrs={'class':'review_text'})]
remove_tags=[dict(name='span', attrs={'id':'date'})] remove_tags=[dict(name='span', attrs={'id':'date'})]
feeds = [(u'Aktualno\u015bci', u'http://www.emuzyka.pl/rss.php?f=1'), (u'Recenzje', u'http://www.emuzyka.pl/rss.php?f=2')] feeds = [(u'Aktualno\u015bci', u'http://www.emuzyka.pl/rss.php?f=1'), (u'Recenzje', u'http://www.emuzyka.pl/rss.php?f=2')]
def preprocess_html(self, soup):
for a in soup('a'):
if a.has_key('href') and 'http://' not in a['href'] and 'https://' not in a['href']:
a['href']=self.index + a['href']
return soup

View File

@ -7,6 +7,7 @@ class Filmweb_pl(BasicNewsRecipe):
cover_url = 'http://userlogos.org/files/logos/crudus/filmweb.png' cover_url = 'http://userlogos.org/files/logos/crudus/filmweb.png'
category = 'movies' category = 'movies'
language = 'pl' language = 'pl'
index='http://www.filmweb.pl'
oldest_article = 8 oldest_article = 8
max_articles_per_feed = 100 max_articles_per_feed = 100
no_stylesheets= True no_stylesheets= True
@ -39,3 +40,9 @@ class Filmweb_pl(BasicNewsRecipe):
self.log.warn(skip_tag) self.log.warn(skip_tag)
return self.index_to_soup(skip_tag['href'], raw=True) return self.index_to_soup(skip_tag['href'], raw=True)
def preprocess_html(self, soup):
for a in soup('a'):
if a.has_key('href') and 'http://' not in a['href'] and 'https://' not in a['href']:
a['href']=self.index + a['href']
return soup

View File

@ -6,16 +6,24 @@ class Gameplay_pl(BasicNewsRecipe):
description = u'gameplay.pl - serwis o naszych zainteresowaniach, grach, filmach, książkach, muzyce, fotografii i konsolach.' description = u'gameplay.pl - serwis o naszych zainteresowaniach, grach, filmach, książkach, muzyce, fotografii i konsolach.'
category = 'games, movies, books, music' category = 'games, movies, books, music'
language = 'pl' language = 'pl'
index='http://gameplay.pl'
masthead_url= 'http://gameplay.pl/img/gpy_top_logo.png' masthead_url= 'http://gameplay.pl/img/gpy_top_logo.png'
cover_url= 'http://gameplay.pl/img/gpy_top_logo.png' cover_url= 'http://gameplay.pl/img/gpy_top_logo.png'
max_articles_per_feed = 100 max_articles_per_feed = 100
remove_javascript= True
no_stylesheets= True no_stylesheets= True
keep_only_tags=[dict(name='div', attrs={'class':['news_endpage_tit', 'news']})] keep_only_tags=[dict(name='div', attrs={'class':['news_endpage_tit', 'news']})]
remove_tags=[dict(name='div', attrs={'class':['galeria', 'noedit center im']})] remove_tags=[dict(name='div', attrs={'class':['galeria', 'noedit center im', 'news_list', 'news_list_autor', 'stop_bot', 'tagi']}), dict(attrs={'usemap':'#map'})]
feeds = [(u'Wiadomo\u015bci', u'http://gameplay.pl/rss/')] feeds = [(u'Wiadomo\u015bci', u'http://gameplay.pl/rss/')]
def image_url_processor(self, baseurl, url): def image_url_processor(self, baseurl, url):
if 'http' not in url: if 'http' not in url:
return 'http://gameplay.pl'+ url[2:] return 'http://gameplay.pl'+ url[2:]
else: else:
return url return url
def preprocess_html(self, soup):
for a in soup('a'):
if a.has_key('href') and '../' in a['href']:
a['href']=self.index + a['href'][2:]
return soup

View File

@ -9,6 +9,7 @@ class Gildia(BasicNewsRecipe):
language = 'pl' language = 'pl'
oldest_article = 8 oldest_article = 8
max_articles_per_feed = 100 max_articles_per_feed = 100
remove_empty_feeds=True
no_stylesheets=True no_stylesheets=True
remove_tags=[dict(name='div', attrs={'class':'backlink'}), dict(name='div', attrs={'class':'im_img'}), dict(name='div', attrs={'class':'addthis_toolbox addthis_default_style'})] remove_tags=[dict(name='div', attrs={'class':'backlink'}), dict(name='div', attrs={'class':'im_img'}), dict(name='div', attrs={'class':'addthis_toolbox addthis_default_style'})]
keep_only_tags=dict(name='div', attrs={'class':'widetext'}) keep_only_tags=dict(name='div', attrs={'class':'widetext'})
@ -24,3 +25,16 @@ class Gildia(BasicNewsRecipe):
self.log.warn('odnosnik') self.log.warn('odnosnik')
self.log.warn(link['href']) self.log.warn(link['href'])
return self.index_to_soup(link['href'], raw=True) return self.index_to_soup(link['href'], raw=True)
def preprocess_html(self, soup):
for a in soup('a'):
if a.has_key('href') and 'http://' not in a['href'] and 'https://' not in a['href']:
if '/gry/' in a['href']:
a['href']='http://www.gry.gildia.pl' + a['href']
elif u'książk' in soup.title.string.lower() or u'komiks' in soup.title.string.lower():
a['href']='http://www.literatura.gildia.pl' + a['href']
elif u'komiks' in soup.title.string.lower():
a['href']='http://www.literatura.gildia.pl' + a['href']
else:
a['href']='http://www.gildia.pl' + a['href']
return soup

View File

@ -7,6 +7,7 @@ class Gram_pl(BasicNewsRecipe):
category = 'games' category = 'games'
language = 'pl' language = 'pl'
oldest_article = 8 oldest_article = 8
index='http://www.gram.pl'
max_articles_per_feed = 100 max_articles_per_feed = 100
no_stylesheets= True no_stylesheets= True
extra_css = 'h2 {font-style: italic; font-size:20px;} .picbox div {float: left;}' extra_css = 'h2 {font-style: italic; font-size:20px;} .picbox div {float: left;}'
@ -52,4 +53,7 @@ class Gram_pl(BasicNewsRecipe):
tag=soup.findAll(name='div', attrs={'class':'picbox'}) tag=soup.findAll(name='div', attrs={'class':'picbox'})
for t in tag: for t in tag:
t['style']='float: left;' t['style']='float: left;'
for a in soup('a'):
if a.has_key('href') and 'http://' not in a['href'] and 'https://' not in a['href']:
a['href']=self.index + a['href']
return soup return soup

View File

@ -8,6 +8,7 @@ class in4(BasicNewsRecipe):
description = u'Serwis Informacyjny - Aktualnosci, recenzje' description = u'Serwis Informacyjny - Aktualnosci, recenzje'
category = 'IT' category = 'IT'
language = 'pl' language = 'pl'
index='http://www.in4.pl/'
#cover_url= 'http://www.in4.pl/recenzje/337/in4pl.jpg' #cover_url= 'http://www.in4.pl/recenzje/337/in4pl.jpg'
no_stylesheets = True no_stylesheets = True
remove_empty_feeds = True remove_empty_feeds = True
@ -39,6 +40,7 @@ class in4(BasicNewsRecipe):
def preprocess_html(self, soup): def preprocess_html(self, soup):
self.append_page(soup, soup.body) self.append_page(soup, soup.body)
for a in soup('a'):
if a.has_key('href') and 'http://' not in a['href'] and 'https://' not in a['href']:
a['href']=self.index + a['href']
return soup return soup

View File

@ -8,6 +8,7 @@ class INFRA(BasicNewsRecipe):
description = u'Serwis Informacyjny INFRA - UFO, Zjawiska Paranormalne, Duchy, Tajemnice świata.' description = u'Serwis Informacyjny INFRA - UFO, Zjawiska Paranormalne, Duchy, Tajemnice świata.'
cover_url = 'http://npn.nazwa.pl/templates/ja_teline_ii/images/logo.jpg' cover_url = 'http://npn.nazwa.pl/templates/ja_teline_ii/images/logo.jpg'
category = 'UFO' category = 'UFO'
index='http://infra.org.pl'
language = 'pl' language = 'pl'
max_articles_per_feed = 100 max_articles_per_feed = 100
no_stylesheers=True no_stylesheers=True
@ -15,3 +16,11 @@ class INFRA(BasicNewsRecipe):
remove_tags_after=dict(attrs={'class':'pagenav'}) remove_tags_after=dict(attrs={'class':'pagenav'})
remove_tags=[dict(attrs={'class':'pagenav'})] remove_tags=[dict(attrs={'class':'pagenav'})]
feeds = [(u'Najnowsze wiadomo\u015bci', u'http://www.infra.org.pl/index.php?option=com_rd_rss&id=1')] feeds = [(u'Najnowsze wiadomo\u015bci', u'http://www.infra.org.pl/index.php?option=com_rd_rss&id=1')]
def preprocess_html(self, soup):
for item in soup.findAll(style=True):
del item['style']
for a in soup('a'):
if a.has_key('href') and 'http://' not in a['href'] and 'https://' not in a['href']:
a['href']=self.index + a['href']
return soup

View File

@ -10,6 +10,23 @@ class Konflikty(BasicNewsRecipe):
category='military, history' category='military, history'
oldest_article = 7 oldest_article = 7
max_articles_per_feed = 100 max_articles_per_feed = 100
auto_cleanup = True no_stylesheets = True
keep_only_tags=[dict(attrs={'class':['title1', 'image']}), dict(id='body')]
feeds = [(u'Aktualności', u'http://www.konflikty.pl/rss_aktualnosci_10.xml'), (u'Artyku\u0142y', u'http://www.konflikty.pl/rss_artykuly_10.xml'), (u'Relacje', u'http://www.konflikty.pl/rss_relacje_10.xml'), (u'Recenzje', u'http://www.konflikty.pl/rss_recenzje_10.xml')] feeds = [(u'Aktualności', u'http://www.konflikty.pl/rss_aktualnosci_10.xml'),
(u'Artyku\u0142y', u'http://www.konflikty.pl/rss_artykuly_10.xml'),
(u'Historia', u'http://www.konflikty.pl/rss_historia_10.xml'),
(u'Militaria', u'http://www.konflikty.pl/rss_militaria_10.xml'),
(u'Relacje', u'http://www.konflikty.pl/rss_relacje_10.xml'),
(u'Recenzje', u'http://www.konflikty.pl/rss_recenzje_10.xml'),
(u'Teksty źródłowe', u'http://www.konflikty.pl/rss_tekstyzrodlowe_10.xml')]
def preprocess_html(self, soup):
for item in soup.findAll(style=True):
del item['style']
for image in soup.findAll(name='a', attrs={'class':'image'}):
if image.img and image.img.has_key('alt'):
image.name='div'
pos = len(image.contents)
image.insert(pos, BeautifulSoup('<p style="font-style:italic;">'+image.img['alt']+'</p>'))
return soup

View File

@ -9,8 +9,9 @@ from calibre.web.feeds.recipes import BasicNewsRecipe
class recipeMagic(BasicNewsRecipe): class recipeMagic(BasicNewsRecipe):
title = 'National Geographic PL' title = 'National Geographic PL'
__author__ = 'Marcin Urban 2011' __author__ = 'Marcin Urban 2011'
__modified_by__ = 'fenuks'
description = 'legenda wśród magazynów z historią sięgającą 120 lat' description = 'legenda wśród magazynów z historią sięgającą 120 lat'
cover_url = 'http://www.guj.pl/var/guj/storage/images/media/nasze_magazyny/national_geographic/logo/ng_logo/2606-1-pol-PL/ng_logo.jpg' #cover_url = 'http://www.guj.pl/var/guj/storage/images/media/nasze_magazyny/national_geographic/logo/ng_logo/2606-1-pol-PL/ng_logo.jpg'
oldest_article = 7 oldest_article = 7
max_articles_per_feed = 100 max_articles_per_feed = 100
no_stylesheets = True no_stylesheets = True
@ -42,11 +43,43 @@ class recipeMagic(BasicNewsRecipe):
] ]
remove_attributes = ['width','height'] remove_attributes = ['width','height']
feeds=[]
feeds = [ def find_articles(self, url):
('National Geographic PL', 'http://www.national-geographic.pl/rss/'), articles = []
] soup=self.index_to_soup(url)
tag=soup.find(attrs={'class':'arl'})
art=tag.ul.findAll('li')
for i in art:
title=i.a['title']
url=i.a['href']
#date=soup.find(id='footer').ul.li.string[41:-1]
desc=i.div.p.string
articles.append({'title' : title,
'url' : url,
'date' : '',
'description' : desc
})
return articles
def parse_index(self):
feeds = []
feeds.append((u"Aktualności", self.find_articles('http://www.national-geographic.pl/aktualnosci/')))
feeds.append((u"Artykuły", self.find_articles('http://www.national-geographic.pl/artykuly/')))
return feeds
def print_version(self, url): def print_version(self, url):
return url.replace('artykuly0Cpokaz', 'drukuj-artykul') if 'artykuly' in url:
return url.replace('artykuly/pokaz', 'drukuj-artykul')
elif 'aktualnosci' in url:
return url.replace('aktualnosci/pokaz', 'drukuj-artykul')
else:
return url
def get_cover_url(self):
soup = self.index_to_soup('http://www.national-geographic.pl/biezace-wydania/')
tag=soup.find(attrs={'class':'txt jus'})
self.cover_url=tag.img['src']
return getattr(self, 'cover_url', self.cover_url)

View File

@ -81,5 +81,7 @@ class Nowa_Fantastyka(BasicNewsRecipe):
title=soup.find(attrs={'class':'tytul'}) title=soup.find(attrs={'class':'tytul'})
if title: if title:
title['style']='font-size: 20px; font-weight: bold;' title['style']='font-size: 20px; font-weight: bold;'
self.log.warn(soup) for a in soup('a'):
if a.has_key('href') and 'http://' not in a['href'] and 'https://' not in a['href']:
a['href']=self.INDEX + a['href']
return soup return soup

View File

@ -7,6 +7,7 @@ class PC_Arena(BasicNewsRecipe):
description = u'Najnowsze informacje z branży IT - testy, recenzje, aktualności, rankingi, wywiady. Twoje źródło informacji o sprzęcie komputerowym.' description = u'Najnowsze informacje z branży IT - testy, recenzje, aktualności, rankingi, wywiady. Twoje źródło informacji o sprzęcie komputerowym.'
category = 'IT' category = 'IT'
language = 'pl' language = 'pl'
index='http://pcarena.pl'
masthead_url='http://pcarena.pl/pcarena/img/logo.png' masthead_url='http://pcarena.pl/pcarena/img/logo.png'
cover_url= 'http://pcarena.pl/pcarena/img/logo.png' cover_url= 'http://pcarena.pl/pcarena/img/logo.png'
no_stylesheets = True no_stylesheets = True
@ -22,4 +23,10 @@ class PC_Arena(BasicNewsRecipe):
if 'http' not in url: if 'http' not in url:
return 'http://pcarena.pl' + url return 'http://pcarena.pl' + url
else: else:
return url return url
def preprocess_html(self, soup):
for a in soup('a'):
if a.has_key('href') and 'http://' not in a['href'] and 'https://' not in a['href']:
a['href']=self.index + a['href']
return soup

View File

@ -34,4 +34,12 @@ class tanuki(BasicNewsRecipe):
def preprocess_html(self, soup): def preprocess_html(self, soup):
self.append_page(soup, soup.body) self.append_page(soup, soup.body)
for a in soup('a'):
if a.has_key('href') and 'http://' not in a['href'] and 'https://' not in a['href']:
if 'tanuki-anime' in soup.title.string.lower():
a['href']='http://anime.tanuki.pl' + a['href']
elif 'tanuki-manga' in soup.title.string.lower():
a['href']='http://manga.tanuki.pl' + a['href']
elif 'tanuki-czytelnia' in soup.title.string.lower():
a['href']='http://czytelnia.tanuki.pl' + a['href']
return soup return soup

View File

@ -8,6 +8,7 @@ class webhosting_pl(BasicNewsRecipe):
cover_url='http://webhosting.pl/images/logo.png' cover_url='http://webhosting.pl/images/logo.png'
masthead_url='http://webhosting.pl/images/logo.png' masthead_url='http://webhosting.pl/images/logo.png'
oldest_article = 7 oldest_article = 7
index='http://webhosting.pl'
max_articles_per_feed = 100 max_articles_per_feed = 100
no_stylesheets = True no_stylesheets = True
remove_empty_feeds = True remove_empty_feeds = True
@ -36,4 +37,10 @@ class webhosting_pl(BasicNewsRecipe):
(u'Marketing', u'http://webhosting.pl/feed/rss/n/11535')] (u'Marketing', u'http://webhosting.pl/feed/rss/n/11535')]
def print_version(self, url): def print_version(self, url):
return url.replace('webhosting.pl', 'webhosting.pl/print') return url.replace('webhosting.pl', 'webhosting.pl/print')
def preprocess_html(self, soup):
for a in soup('a'):
if a.has_key('href') and 'http://' not in a['href'] and 'https://' not in a['href']:
a['href']=self.index + a['href']
return soup