mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-06-23 15:30:45 -04:00
...
This commit is contained in:
parent
15f9e96441
commit
d0fcd63485
@ -6,7 +6,7 @@ __copyright__ = '2011, Piotr Kontek, piotr.kontek@gmail.com \
|
||||
2013-2014, Tomasz Długosz, tomek3d@gmail.com'
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
import re
|
||||
import re, time
|
||||
from lxml import html
|
||||
|
||||
class GN(BasicNewsRecipe):
|
||||
@ -59,7 +59,7 @@ class GN(BasicNewsRecipe):
|
||||
while True:
|
||||
soup = self.index_to_soup('http://gosc.pl' + self.find_last_issue().replace('przeglad','wszystko') + '/' + str(page))
|
||||
articles = list(self.find_articles(soup))
|
||||
not_assigned.extend([ x for x in articles if x.get('url') not in enlisted ])
|
||||
not_assigned.extend([x for x in articles if x.get('url') not in enlisted])
|
||||
page+=1
|
||||
pages = soup.find('span', attrs={'class':'pgr_nrs'})
|
||||
if str(page) not in [self.tag_to_string(x)[1] for x in pages.findAll('a')]:
|
||||
@ -100,7 +100,7 @@ class GN(BasicNewsRecipe):
|
||||
for r in soup.findAll(attrs={'class':['di_dr', 'doc_image']}):
|
||||
del r['style']
|
||||
for r in soup.findAll(attrs={'class':'cm-i-a'}):
|
||||
r.replaceWith( '<div style="clear:both"></div>' + r.prettify() + '<div style="clear:both"></div>')
|
||||
r.replaceWith('<div style="clear:both"></div>' + r.prettify() + '<div style="clear:both"></div>')
|
||||
return soup
|
||||
|
||||
keep_only_tags = [
|
||||
|
Loading…
x
Reference in New Issue
Block a user