This commit is contained in:
Kovid Goyal 2014-02-21 09:11:02 +05:30
parent 15f9e96441
commit d0fcd63485

View File

@ -6,7 +6,7 @@ __copyright__ = '2011, Piotr Kontek, piotr.kontek@gmail.com \
2013-2014, Tomasz Długosz, tomek3d@gmail.com' 2013-2014, Tomasz Długosz, tomek3d@gmail.com'
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
import re import re, time
from lxml import html from lxml import html
class GN(BasicNewsRecipe): class GN(BasicNewsRecipe):
@ -59,7 +59,7 @@ class GN(BasicNewsRecipe):
while True: while True:
soup = self.index_to_soup('http://gosc.pl' + self.find_last_issue().replace('przeglad','wszystko') + '/' + str(page)) soup = self.index_to_soup('http://gosc.pl' + self.find_last_issue().replace('przeglad','wszystko') + '/' + str(page))
articles = list(self.find_articles(soup)) articles = list(self.find_articles(soup))
not_assigned.extend([ x for x in articles if x.get('url') not in enlisted ]) not_assigned.extend([x for x in articles if x.get('url') not in enlisted])
page+=1 page+=1
pages = soup.find('span', attrs={'class':'pgr_nrs'}) pages = soup.find('span', attrs={'class':'pgr_nrs'})
if str(page) not in [self.tag_to_string(x)[1] for x in pages.findAll('a')]: if str(page) not in [self.tag_to_string(x)[1] for x in pages.findAll('a')]:
@ -100,7 +100,7 @@ class GN(BasicNewsRecipe):
for r in soup.findAll(attrs={'class':['di_dr', 'doc_image']}): for r in soup.findAll(attrs={'class':['di_dr', 'doc_image']}):
del r['style'] del r['style']
for r in soup.findAll(attrs={'class':'cm-i-a'}): for r in soup.findAll(attrs={'class':'cm-i-a'}):
r.replaceWith( '<div style="clear:both"></div>' + r.prettify() + '<div style="clear:both"></div>') r.replaceWith('<div style="clear:both"></div>' + r.prettify() + '<div style="clear:both"></div>')
return soup return soup
keep_only_tags = [ keep_only_tags = [