From b59c3778a39128e6ed222225b8659fbaf9ccff86 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tomasz=20D=C5=82ugosz?= Date: Fri, 21 Feb 2014 20:14:01 +0100 Subject: [PATCH 1/3] save link to last number in a variable instead of finding it out again and again --- recipes/gosc_full.recipe | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/recipes/gosc_full.recipe b/recipes/gosc_full.recipe index 5797c0419f..abb9c9b694 100644 --- a/recipes/gosc_full.recipe +++ b/recipes/gosc_full.recipe @@ -30,7 +30,8 @@ class GN(BasicNewsRecipe): return page[4] def parse_index(self): - soup = self.index_to_soup('http://gosc.pl' + self.find_last_issue()) + self.last_issue = self.find_last_issue() + soup = self.index_to_soup('http://gosc.pl' + self.last_issue) self.cover_url = 'http://www.gosc.pl' + soup.find('div',attrs={'class':'fl-w100 release-wp'}).findAll('a')[-4].contents[0]['src'] feeds = [] enlisted = [] @@ -57,7 +58,7 @@ class GN(BasicNewsRecipe): page = 1 not_assigned = [] while True: - soup = self.index_to_soup('http://gosc.pl' + self.find_last_issue().replace('przeglad','wszystko') + '/' + str(page)) + soup = self.index_to_soup('http://gosc.pl' + self.last_issue.replace('przeglad','wszystko') + '/' + str(page)) articles = list(self.find_articles(soup)) not_assigned.extend([x for x in articles if x.get('url') not in enlisted]) page+=1 From ac176a1e29558580aa462e537adb91790c4b9ee7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tomasz=20D=C5=82ugosz?= Date: Fri, 21 Feb 2014 20:23:38 +0100 Subject: [PATCH 2/3] use mobile page pt.1 --- recipes/gosc_full.recipe | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/recipes/gosc_full.recipe b/recipes/gosc_full.recipe index abb9c9b694..f149140eb8 100644 --- a/recipes/gosc_full.recipe +++ b/recipes/gosc_full.recipe @@ -20,9 +20,9 @@ class GN(BasicNewsRecipe): remove_javascript = True def find_last_issue(self): - raw = self.index_to_soup('http://gosc.pl/wyszukaj/wydania/3.Gosc-Niedzielny/', raw=True) + raw = self.index_to_soup('http://m.gosc.pl/wyszukaj/wydania/3.Gosc-Niedzielny/', raw=True) doc = html.fromstring(raw) - page = doc.xpath('//div[@class="c"]//div[@class="search-result"]/div[1]/div[2]/h1//a/@href') + page = doc.xpath('//div[@class="search-result"]/div[1]/div[1]/a/@href') if time.strftime("%w") in ['3','4']: return page[5] From 09f9d7348f5f8c34eac9bcf192a5ee6cd8ea919c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tomasz=20D=C5=82ugosz?= Date: Sun, 23 Feb 2014 02:05:42 +0100 Subject: [PATCH 3/3] add masthead picture and fix feed name --- recipes/gosc_full.recipe | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/recipes/gosc_full.recipe b/recipes/gosc_full.recipe index f149140eb8..300a04cf59 100644 --- a/recipes/gosc_full.recipe +++ b/recipes/gosc_full.recipe @@ -18,6 +18,7 @@ class GN(BasicNewsRecipe): no_stylesheets = True language = 'pl' remove_javascript = True + masthead_url = 'http://m.gosc.pl/static/themes/czerwony_gosc-mobile/logo.png' def find_last_issue(self): raw = self.index_to_soup('http://m.gosc.pl/wyszukaj/wydania/3.Gosc-Niedzielny/', raw=True) @@ -43,7 +44,7 @@ class GN(BasicNewsRecipe): 'url' : 'http://www.gosc.pl' + art['href'], 'description' : self.tag_to_string(a.find('p',attrs={'class':'b lead'})) }] - feeds.append((u'Wstępniak',articles)) + feeds.append((u'Na dobry początek',articles)) enlisted.append(articles[0].get('url')) # columns: for addr in soup.findAll('a',attrs={'href':re.compile('kategoria')}):