From 58f26a724320d23ef7ad21874d095db62e9970ce Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tomasz=20D=C5=82ugosz?= Date: Sat, 2 Mar 2013 22:50:37 +0100 Subject: [PATCH 1/3] update fronda recipe --- recipes/fronda.recipe | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/recipes/fronda.recipe b/recipes/fronda.recipe index d0177b998e..6ed5d052a3 100644 --- a/recipes/fronda.recipe +++ b/recipes/fronda.recipe @@ -1,7 +1,7 @@ #!/usr/bin/env python __license__ = 'GPL v3' -__copyright__ = u'2010-2012, Tomasz Dlugosz ' +__copyright__ = u'2010-2013, Tomasz Dlugosz ' ''' fronda.pl ''' @@ -68,7 +68,8 @@ class Fronda(BasicNewsRecipe): article_url = 'http://www.fronda.pl' + article_a['href'] article_title = self.tag_to_string(article_a) articles[genName].append( { 'title' : article_title, 'url' : article_url, 'date' : article_date }) - feeds.append((genName, articles[genName])) + if articles[genName]: + feeds.append((genName, articles[genName])) return feeds keep_only_tags = [ @@ -82,8 +83,10 @@ class Fronda(BasicNewsRecipe): dict(name='h3', attrs={'class':'block-header article comments'}), dict(name='ul', attrs={'class':'comment-list'}), dict(name='ul', attrs={'class':'category'}), + dict(name='ul', attrs={'class':'tag-list'}), dict(name='p', attrs={'id':'comments-disclaimer'}), dict(name='div', attrs={'style':'text-align: left; margin-bottom: 15px;'}), - dict(name='div', attrs={'style':'text-align: left; margin-top: 15px;'}), + dict(name='div', attrs={'style':'text-align: left; margin-top: 15px; margin-bottom: 30px;'}), + dict(name='div', attrs={'class':'related-articles content'}), dict(name='div', attrs={'id':'comment-form'}) ] From 3decd7262f366a1a35bba9bc164e201233181209 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tomasz=20D=C5=82ugosz?= Date: Sat, 2 Mar 2013 22:52:45 +0100 Subject: [PATCH 2/3] update .bzrignore --- .bzrignore | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.bzrignore b/.bzrignore index f14ff947f6..6197e46ef1 100644 --- a/.bzrignore +++ b/.bzrignore @@ -37,7 +37,7 @@ nbproject/ calibre_plugins/ recipes/.git recipes/.gitignore -recipes/README +recipes/README.md recipes/katalog_egazeciarz.recipe recipes/tv_axnscifi.recipe recipes/tv_comedycentral.recipe From 5aafc45c08ce61fbeb20ffb464c7a8f12c704c9f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tomasz=20D=C5=82ugosz?= Date: Sun, 3 Mar 2013 01:29:24 +0100 Subject: [PATCH 3/3] some improvements in gosc_niedzielny.recipe --- recipes/gosc_niedzielny.recipe | 34 ++++++++++++++++++++++------------ 1 file changed, 22 insertions(+), 12 deletions(-) diff --git a/recipes/gosc_niedzielny.recipe b/recipes/gosc_niedzielny.recipe index 59c8fc2f26..11beb076f5 100644 --- a/recipes/gosc_niedzielny.recipe +++ b/recipes/gosc_niedzielny.recipe @@ -2,7 +2,8 @@ #!/usr/bin/env python __license__ = 'GPL v3' -__copyright__ = '2011, Piotr Kontek, piotr.kontek@gmail.com' +__copyright__ = '2011, Piotr Kontek, piotr.kontek@gmail.com \ + 2013, Tomasz Długosz, tomek3d@gmail.com' from calibre.web.feeds.news import BasicNewsRecipe from calibre.ptempfile import PersistentTemporaryFile @@ -12,9 +13,9 @@ import re class GN(BasicNewsRecipe): EDITION = 0 - __author__ = 'Piotr Kontek' - title = u'Gość niedzielny' - description = 'Weekly magazine' + __author__ = 'Piotr Kontek, Tomasz Długosz' + title = u'Gość Niedzielny' + description = 'Ogólnopolski tygodnik katolicki' encoding = 'utf-8' no_stylesheets = True language = 'pl' @@ -38,17 +39,25 @@ class GN(BasicNewsRecipe): first = True for p in main_section.findAll('p', attrs={'class':None}, recursive=False): if first and p.find('img') != None: - article = article + '

' - article = article + str(p.find('img')).replace('src="/files/','src="http://www.gosc.pl/files/') - article = article + '' + article += '

' + article += str(p.find('img')).replace('src="/files/','src="http://www.gosc.pl/files/') + article += '' for s in p.findAll('span'): - article = article + self.tag_to_string(s) - article = article + '

' + article += self.tag_to_string(s) + article += '

' else: - article = article + str(p).replace('src="/files/','src="http://www.gosc.pl/files/') + article += str(p).replace('src="/files/','src="http://www.gosc.pl/files/') first = False + limiter = main_section.find('p', attrs={'class' : 'limiter'}) + if limiter: + article += str(limiter) - html = unicode(title) + unicode(authors) + unicode(article) + html = unicode(title) + #sometimes authors are not filled in: + if authors: + html += unicode(authors) + unicode(article) + else: + html += unicode(article) self.temp_files.append(PersistentTemporaryFile('_temparse.html')) self.temp_files[-1].write(html) @@ -65,7 +74,8 @@ class GN(BasicNewsRecipe): if img != None: a = img.parent self.EDITION = a['href'] - self.title = img['alt'] + #this was preventing kindles from moving old issues to 'Back Issues' category: + #self.title = img['alt'] self.cover_url = 'http://www.gosc.pl' + img['src'] if year != date.today().year or not first: break