This commit is contained in:
Kovid Goyal 2014-02-21 09:11:02 +05:30
parent 15f9e96441
commit d0fcd63485

View File

@ -6,7 +6,7 @@ __copyright__ = '2011, Piotr Kontek, piotr.kontek@gmail.com \
2013-2014, Tomasz Długosz, tomek3d@gmail.com'
from calibre.web.feeds.news import BasicNewsRecipe
import re
import re, time
from lxml import html
class GN(BasicNewsRecipe):
@ -59,7 +59,7 @@ class GN(BasicNewsRecipe):
while True:
soup = self.index_to_soup('http://gosc.pl' + self.find_last_issue().replace('przeglad','wszystko') + '/' + str(page))
articles = list(self.find_articles(soup))
not_assigned.extend([ x for x in articles if x.get('url') not in enlisted ])
not_assigned.extend([x for x in articles if x.get('url') not in enlisted])
page+=1
pages = soup.find('span', attrs={'class':'pgr_nrs'})
if str(page) not in [self.tag_to_string(x)[1] for x in pages.findAll('a')]:
@ -100,7 +100,7 @@ class GN(BasicNewsRecipe):
for r in soup.findAll(attrs={'class':['di_dr', 'doc_image']}):
del r['style']
for r in soup.findAll(attrs={'class':'cm-i-a'}):
r.replaceWith( '<div style="clear:both"></div>' + r.prettify() + '<div style="clear:both"></div>')
r.replaceWith('<div style="clear:both"></div>' + r.prettify() + '<div style="clear:both"></div>')
return soup
keep_only_tags = [