From cbf7ad4f735f5fb64f2ea5dd365c9e0e4637be04 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tomasz=20D=C5=82ugosz?= Date: Mon, 19 Oct 2015 12:24:55 +0200 Subject: [PATCH 1/4] fix interia_fakty --- recipes/interia_fakty.recipe | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/recipes/interia_fakty.recipe b/recipes/interia_fakty.recipe index 194048beb0..f45dc81546 100644 --- a/recipes/interia_fakty.recipe +++ b/recipes/interia_fakty.recipe @@ -1,7 +1,7 @@ #!/usr/bin/env python2 __license__ = 'GPL v3' -__copyright__ = u'2010-2013, Tomasz Dlugosz ' +__copyright__ = u'2010-2015, Tomasz Dlugosz ' ''' fakty.interia.pl ''' @@ -29,7 +29,7 @@ class InteriaFakty(BasicNewsRecipe): keep_only_tags = [ dict(name='h1'), - dict(name='div', attrs={'class': ['lead textContent', 'text textContent', 'source']})] + dict(name='div', attrs={'class': ['lead textContent fontSize-medium', 'text textContent fontSize-medium', 'source']})] remove_tags = [dict(name='div', attrs={'class':['embed embedAd', 'REMOVE', 'boxHeader']})] From d7adc0758e1751a4687e5a8e8f6173b7af22d7ba Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tomasz=20D=C5=82ugosz?= Date: Mon, 19 Oct 2015 23:11:39 +0200 Subject: [PATCH 2/4] articles and feeds without date - now way to figure out when content emerged --- recipes/icons/mojegotowanie.png | Bin 215 -> 0 bytes recipes/mojegotowanie.recipe | 50 -------------------------------- 2 files changed, 50 deletions(-) delete mode 100644 recipes/icons/mojegotowanie.png delete mode 100644 recipes/mojegotowanie.recipe diff --git a/recipes/icons/mojegotowanie.png b/recipes/icons/mojegotowanie.png deleted file mode 100644 index ee47f4b1737c3fcad5ae4f9bfc4ef051316c7704..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 215 zcmeAS@N?(olHy`uVBq!ia0vp^0wB!61SBU+%rFB|^E_P~Ln>}1SIj-=Ep%wzLwi1k zs|+c%GhXngFnB6RuX}j>q-g%9Bm3m%{8fvW=?E~Xxp{#BD3!wNaCQA=miy(Fhu6Ah zG(>!Q|2usL8^`0^l|mhjD<7WT6*WP+Pv7}^z@mm&<~d7x*-TniIZia;&F4z3yKn8g zrtQ1#H3eJNAeW4W#`V&(>+Sb>G_j>@7ktWY@oQVTRzS<0Bkn>>49;S&Ch>mD{seR> NgQu&X%Q~loCIBsnQnCO5 diff --git a/recipes/mojegotowanie.recipe b/recipes/mojegotowanie.recipe deleted file mode 100644 index 7981eb0042..0000000000 --- a/recipes/mojegotowanie.recipe +++ /dev/null @@ -1,50 +0,0 @@ -#!/usr/bin/env python2 - -__license__ = 'GPL v3' -__copyright__ = 'MrStefan, teepel' - -''' -www.mojegotowanie.pl -''' - -from calibre.web.feeds.news import BasicNewsRecipe - -class mojegotowanie(BasicNewsRecipe): - title = u'Moje Gotowanie' - __author__ = 'MrStefan , teepel ' - language = 'pl' - description =u'Gotowanie to Twoja pasja? Uwielbiasz sałatki? Lubisz grillować? Przepisy kulinarne doskonałe na wszystkie okazje znajdziesz na www.mojegotowanie.pl.' - masthead_url='http://www.mojegotowanie.pl/extension/selfstart/design/self/images/top_c2.gif' - cover_url = 'http://www.mojegotowanie.pl/extension/selfstart/design/self/images/mgpl/mojegotowanie.gif' - remove_empty_feeds= True - oldest_article = 7 - max_articles_per_feed = 100 - remove_javascript=True - no_stylesheets=True - - keep_only_tags =[] - keep_only_tags.append(dict(name='div', attrs={'class' : 'content'})) - - feeds = [(u'Artykuły', u'http://mojegotowanie.pl/rss/feed/artykuly'), - (u'Przepisy', u'http://mojegotowanie.pl/rss/feed/przepisy')] - - def parse_feeds(self): - feeds = BasicNewsRecipe.parse_feeds(self) - for feed in feeds: - for article in feed.articles[:]: - if 'film' in article.title: - feed.articles.remove(article) - return feeds - - def get_article_url(self, article): - link = article.get('link') - if 'Clayout0Cset0Cprint0' in link: - return link - - def print_version(self, url): - segment = url.split('/') - URLPart = segment[-2] - URLPart = URLPart.replace('0L0Smojegotowanie0Bpl0Clayout0Cset0Cprint0C', '/') - URLPart = URLPart.replace('0I', '_') - URLPart = URLPart.replace('0C', '/') - return 'http://www.mojegotowanie.pl/layout/set/print' + URLPart From 3a942a43fe81da4d27d14c2d8983d90cd1ffc394 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tomasz=20D=C5=82ugosz?= Date: Mon, 19 Oct 2015 23:18:20 +0200 Subject: [PATCH 3/4] this site became a wrapper for a facebook page... --- recipes/astronomia_pl.recipe | 18 ------------------ recipes/icons/astronomia_pl.png | Bin 546 -> 0 bytes 2 files changed, 18 deletions(-) delete mode 100644 recipes/astronomia_pl.recipe delete mode 100644 recipes/icons/astronomia_pl.png diff --git a/recipes/astronomia_pl.recipe b/recipes/astronomia_pl.recipe deleted file mode 100644 index aa84860976..0000000000 --- a/recipes/astronomia_pl.recipe +++ /dev/null @@ -1,18 +0,0 @@ -from calibre.web.feeds.news import BasicNewsRecipe -import re -class Astronomia_pl(BasicNewsRecipe): - title = u'Astronomia.pl' - __author__ = 'fenuks' - description = u'Astronomia.pl jest edukacyjnym portalem skierowanym do uczniów, studentów i miłośników astronomii. Przedstawiamy gwiazdy, planety, galaktyki, czarne dziury i wiele innych tajemnic Wszechświata.' - masthead_url = 'http://www.astronomia.pl/grafika/logo.gif' - cover_url = 'http://www.astronomia.pl/grafika/logo.gif' - category = 'astronomy, science' - language = 'pl' - oldest_article = 8 - max_articles_per_feed = 100 - extra_css='#h2 {font-size: 18px;}' - no_stylesheets=True - preprocess_regexps = [(re.compile(ur'Przeczytaj także:.*?', re.DOTALL), lambda match: '') ] - remove_tags_before=dict(name='div', attrs={'id':'a1'}) - keep_only_tags=[dict(name='div', attrs={'id':['a1', 'h2']})] - feeds = [(u'Wiadomości z astronomii i astronautyki', u'http://www.astronomia.pl/rss/')] diff --git a/recipes/icons/astronomia_pl.png b/recipes/icons/astronomia_pl.png deleted file mode 100644 index 1535ea476157d0ccd236b2978856d4ae13ba75ce..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 546 zcmV+-0^R+IP)*9w(_>F0ZoL>`MIW z>zejpEti*sU`{k1k8dK8$P0p_(Ws>3+)_oihbk21rCzV!5P~@f2Eiy8hHjxyc-(9@ z3DY#+lAbX}m0|qE%!3T0dkv!yh#RQYYM%*Sc)<%Jrb}iL0vy!oXr`igouE)CB&qZt zhGBdnKK6RO6~-{Wi2mtU3)+Pml_BJa09?;v$1@<^?H3p2nc30U=Sw% z2OB&X4Biue-#x8($2-JgF}vApKDAn{bAG1N>EB#oKA*osu9(l~@95qC;wM|FRKC(g zbi3WEjBVR*_WOOZR4RSvjgW&}QLR?LvBUHRcJvk+gm8my1xMZSX2`|IvH5KAc>I1c knJ9#C178|no8yuD2S<5IoDgSGN&o-=07*qoM6N<$f|nonKL7v# From b4684dca7a91d41dbcfc6477aef9aa14c5d43dbb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tomasz=20D=C5=82ugosz?= Date: Mon, 19 Oct 2015 23:58:53 +0200 Subject: [PATCH 4/4] include title, remove some garbage --- recipes/dobreprogamy.recipe | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/recipes/dobreprogamy.recipe b/recipes/dobreprogamy.recipe index 3b0c1c5f33..197458d1ea 100644 --- a/recipes/dobreprogamy.recipe +++ b/recipes/dobreprogamy.recipe @@ -3,7 +3,7 @@ import re class Dobreprogramy_pl(BasicNewsRecipe): title = 'Dobreprogramy.pl' - __author__ = 'fenuks' + __author__ = u'fenuks & Tomasz Długosz' __licence__ ='GPL v3' category = 'IT' masthead_url='http://static.dpcdn.pl/css/Black/Images/header_logo_napis_fullVersion.png' @@ -18,9 +18,9 @@ class Dobreprogramy_pl(BasicNewsRecipe): max_articles_per_feed = 100 remove_attrs = ['style', 'width', 'height'] preprocess_regexps = [(re.compile(ur'
Twoja przeglądarka nie obsługuje Flasha i HTML5 lub wyłączono obsługę JavaScript...
'), lambda match: '') ] - keep_only_tags = [dict(attrs={'class':['entry single']}), dict(id='phContent_divArticle')] - remove_tags = [dict(attrs={'class':['newsOptions', 'noPrint', 'komentarze', 'tags font-heading-master', 'social nested-grid grid-margin-px15-top clearfix no-mobile', 'page-info text-h4 font-heading grid-margin-px15-top color-annotation clearfix']}), dict(id='komentarze'), dict(name='iframe')] - #remove_tags = [dict(name='div', attrs={'class':['komentarze', 'block', 'portalInfo', 'menuBar', 'topBar']})] + keep_only_tags = [dict(name='h1'), dict(attrs={'class':['entry single']}), dict(id='phContent_divArticle')] + remove_tags = [dict(attrs={'class':['newsOptions', 'noPrint', 'komentarze', 'tags font-heading-master', 'social nested-grid grid-margin-px15-top clearfix no-mobile', 'page-info text-h4 font-heading grid-margin-px15-top color-annotation clearfix', 'series grid-margin-px30-top']}), dict(id='komentarze'), dict(id='phContent_ctl02_sBreadcrumb'), dict(name='iframe')] + feeds = [(u'Aktualności', 'http://feeds.feedburner.com/dobreprogramy/Aktualnosci'), ('Blogi', 'http://feeds.feedburner.com/dobreprogramy/BlogCzytelnikow')] @@ -36,4 +36,8 @@ class Dobreprogramy_pl(BasicNewsRecipe): for r in soup.findAll('span', text=''): if not r.string: r.extract() - return soup \ No newline at end of file + return soup + + extra_css = ''' + h1 { font-size:130% } + '''