mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-06-23 15:30:45 -04:00
...
This commit is contained in:
parent
15f9e96441
commit
d0fcd63485
@ -6,7 +6,7 @@ __copyright__ = '2011, Piotr Kontek, piotr.kontek@gmail.com \
|
|||||||
2013-2014, Tomasz Długosz, tomek3d@gmail.com'
|
2013-2014, Tomasz Długosz, tomek3d@gmail.com'
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
import re
|
import re, time
|
||||||
from lxml import html
|
from lxml import html
|
||||||
|
|
||||||
class GN(BasicNewsRecipe):
|
class GN(BasicNewsRecipe):
|
||||||
@ -59,7 +59,7 @@ class GN(BasicNewsRecipe):
|
|||||||
while True:
|
while True:
|
||||||
soup = self.index_to_soup('http://gosc.pl' + self.find_last_issue().replace('przeglad','wszystko') + '/' + str(page))
|
soup = self.index_to_soup('http://gosc.pl' + self.find_last_issue().replace('przeglad','wszystko') + '/' + str(page))
|
||||||
articles = list(self.find_articles(soup))
|
articles = list(self.find_articles(soup))
|
||||||
not_assigned.extend([ x for x in articles if x.get('url') not in enlisted ])
|
not_assigned.extend([x for x in articles if x.get('url') not in enlisted])
|
||||||
page+=1
|
page+=1
|
||||||
pages = soup.find('span', attrs={'class':'pgr_nrs'})
|
pages = soup.find('span', attrs={'class':'pgr_nrs'})
|
||||||
if str(page) not in [self.tag_to_string(x)[1] for x in pages.findAll('a')]:
|
if str(page) not in [self.tag_to_string(x)[1] for x in pages.findAll('a')]:
|
||||||
@ -100,7 +100,7 @@ class GN(BasicNewsRecipe):
|
|||||||
for r in soup.findAll(attrs={'class':['di_dr', 'doc_image']}):
|
for r in soup.findAll(attrs={'class':['di_dr', 'doc_image']}):
|
||||||
del r['style']
|
del r['style']
|
||||||
for r in soup.findAll(attrs={'class':'cm-i-a'}):
|
for r in soup.findAll(attrs={'class':'cm-i-a'}):
|
||||||
r.replaceWith( '<div style="clear:both"></div>' + r.prettify() + '<div style="clear:both"></div>')
|
r.replaceWith('<div style="clear:both"></div>' + r.prettify() + '<div style="clear:both"></div>')
|
||||||
return soup
|
return soup
|
||||||
|
|
||||||
keep_only_tags = [
|
keep_only_tags = [
|
||||||
|
Loading…
x
Reference in New Issue
Block a user