mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-07 10:14:46 -04:00
Merge branch 'master' of https://github.com/t3d/calibre
This commit is contained in:
commit
37fcd4093b
@ -18,11 +18,12 @@ class GN(BasicNewsRecipe):
|
|||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
language = 'pl'
|
language = 'pl'
|
||||||
remove_javascript = True
|
remove_javascript = True
|
||||||
|
masthead_url = 'http://m.gosc.pl/static/themes/czerwony_gosc-mobile/logo.png'
|
||||||
|
|
||||||
def find_last_issue(self):
|
def find_last_issue(self):
|
||||||
raw = self.index_to_soup('http://gosc.pl/wyszukaj/wydania/3.Gosc-Niedzielny/', raw=True)
|
raw = self.index_to_soup('http://m.gosc.pl/wyszukaj/wydania/3.Gosc-Niedzielny/', raw=True)
|
||||||
doc = html.fromstring(raw)
|
doc = html.fromstring(raw)
|
||||||
page = doc.xpath('//div[@class="c"]//div[@class="search-result"]/div[1]/div[2]/h1//a/@href')
|
page = doc.xpath('//div[@class="search-result"]/div[1]/div[1]/a/@href')
|
||||||
|
|
||||||
if time.strftime("%w") in ['3','4']:
|
if time.strftime("%w") in ['3','4']:
|
||||||
return page[5]
|
return page[5]
|
||||||
@ -30,7 +31,8 @@ class GN(BasicNewsRecipe):
|
|||||||
return page[4]
|
return page[4]
|
||||||
|
|
||||||
def parse_index(self):
|
def parse_index(self):
|
||||||
soup = self.index_to_soup('http://gosc.pl' + self.find_last_issue())
|
self.last_issue = self.find_last_issue()
|
||||||
|
soup = self.index_to_soup('http://gosc.pl' + self.last_issue)
|
||||||
self.cover_url = 'http://www.gosc.pl' + soup.find('div',attrs={'class':'fl-w100 release-wp'}).findAll('a')[-4].contents[0]['src']
|
self.cover_url = 'http://www.gosc.pl' + soup.find('div',attrs={'class':'fl-w100 release-wp'}).findAll('a')[-4].contents[0]['src']
|
||||||
feeds = []
|
feeds = []
|
||||||
enlisted = []
|
enlisted = []
|
||||||
@ -42,7 +44,7 @@ class GN(BasicNewsRecipe):
|
|||||||
'url' : 'http://www.gosc.pl' + art['href'],
|
'url' : 'http://www.gosc.pl' + art['href'],
|
||||||
'description' : self.tag_to_string(a.find('p',attrs={'class':'b lead'}))
|
'description' : self.tag_to_string(a.find('p',attrs={'class':'b lead'}))
|
||||||
}]
|
}]
|
||||||
feeds.append((u'Wstępniak',articles))
|
feeds.append((u'Na dobry początek',articles))
|
||||||
enlisted.append(articles[0].get('url'))
|
enlisted.append(articles[0].get('url'))
|
||||||
# columns:
|
# columns:
|
||||||
for addr in soup.findAll('a',attrs={'href':re.compile('kategoria')}):
|
for addr in soup.findAll('a',attrs={'href':re.compile('kategoria')}):
|
||||||
@ -57,7 +59,7 @@ class GN(BasicNewsRecipe):
|
|||||||
page = 1
|
page = 1
|
||||||
not_assigned = []
|
not_assigned = []
|
||||||
while True:
|
while True:
|
||||||
soup = self.index_to_soup('http://gosc.pl' + self.find_last_issue().replace('przeglad','wszystko') + '/' + str(page))
|
soup = self.index_to_soup('http://gosc.pl' + self.last_issue.replace('przeglad','wszystko') + '/' + str(page))
|
||||||
articles = list(self.find_articles(soup))
|
articles = list(self.find_articles(soup))
|
||||||
not_assigned.extend([x for x in articles if x.get('url') not in enlisted])
|
not_assigned.extend([x for x in articles if x.get('url') not in enlisted])
|
||||||
page+=1
|
page+=1
|
||||||
|
Loading…
x
Reference in New Issue
Block a user