diff --git a/recipes/balkanist.recipe b/recipes/balkanist.recipe deleted file mode 100644 index 59a92906cc..0000000000 --- a/recipes/balkanist.recipe +++ /dev/null @@ -1,50 +0,0 @@ -# -*- mode: python -*- -# -*- coding: utf-8 -*- - -__license__ = 'GPL v3' -__copyright__ = '2017, Darko Miletic ' -''' -http://balkanist.net/magazine -''' -from calibre.web.feeds.news import BasicNewsRecipe - - -class Pagina12(BasicNewsRecipe): - title = 'Balkanist' - __author__ = 'Darko Miletic' - description = 'Balkanist is an experimental, bilingual platform featuring politics, analysis, culture, and criticism for a smart international audience underwhelmed by what is currently on offer. Our aim is to provide bold, uncompromising coverage of the Balkan region and everything to its East. We are currently entirely independent, self- and reader-funded, and are not affiliated with any organization, company, or government institution.' # noqa - publisher = 'Balkanist' - category = 'news, politics, Balkans' - oldest_article = 30 - no_stylesheets = True - encoding = 'utf8' - use_embedded_content = False - language = 'en' - remove_empty_feeds = True - publication_type = 'magazine' - auto_cleanup = True - masthead_url = 'http://media.balkanist.net/2013/07/Balkanist-Magazine-cover.png' - ignore_duplicate_articles = {'url'} - extra_css = """ - body{font-family: Lora,serif} - img{margin-top:1em; margin-bottom: 1em; display:block} - """ - - conversion_options = { - 'comment': description, - 'tags': category, - 'publisher': publisher, - 'language': language - } - - remove_tags = [ - dict(name=['meta', 'link']), - ] - - feeds = [ - (u'Magazine', u'http://balkanist.net/magazine/feed/'), - (u'News', u'http://balkanist.net/news/feed/'), - (u'Commentary', u'http://balkanist.net/commentary/feed/'), - (u'Arts and Culture', u'http://balkanist.net/arts-and-culture/feed/'), - (u'Politics', u'http://balkanist.net/politics/feed/'), - ] diff --git a/recipes/bangkok_biz.recipe b/recipes/bangkok_biz.recipe deleted file mode 100644 index f0ec772e45..0000000000 --- a/recipes/bangkok_biz.recipe +++ /dev/null @@ -1,25 +0,0 @@ -from calibre.web.feeds.news import BasicNewsRecipe - - -class AdvancedUserRecipe1290689337(BasicNewsRecipe): - __author__ = 'Anat R.' - language = 'th' - title = u'Bangkok Biz News' - oldest_article = 7 - max_articles_per_feed = 100 - no_stylesheets = True - remove_javascript = True - use_embedded_content = False - feeds = [(u'Headlines', - u'http://www.bangkokbiznews.com/home/services/rss/home.xml'), - (u'Politics', u'http://www.bangkokbiznews.com/home/services/rss/politics.xml'), - (u'Business', u'http://www.bangkokbiznews.com/home/services/rss/business.xml'), - (u'Finance', u' http://www.bangkokbiznews.com/home/services/rss/finance.xml'), - (u'Technology', u' http://www.bangkokbiznews.com/home/services/rss/it.xml')] - remove_tags_before = dict(name='div', attrs={'class': 'box-Detailcontent'}) - remove_tags_after = dict(name='p', attrs={'class': 'allTags'}) - remove_tags = [] - remove_tags.append(dict(name='div', attrs={'id': 'content-tools'})) - remove_tags.append(dict(name='p', attrs={'class': 'allTags'})) - remove_tags.append(dict(name='div', attrs={'id': 'morePic'})) - remove_tags.append(dict(name='ul', attrs={'class': 'tabs-nav'})) diff --git a/recipes/bankier_pl.recipe b/recipes/bankier_pl.recipe deleted file mode 100644 index b7cc2cce8f..0000000000 --- a/recipes/bankier_pl.recipe +++ /dev/null @@ -1,49 +0,0 @@ -#!/usr/bin/env python - -__license__ = 'GPL v3' -__author__ = 'teepel ' - -''' -bankier.pl -''' - -from calibre.web.feeds.news import BasicNewsRecipe - - -class bankier(BasicNewsRecipe): - title = u'Bankier.pl' - __author__ = 'teepel ' - language = 'pl' - description = 'Polski portal finansowy. Informacje o: gospodarka, inwestowanie, finanse osobiste, prowadzenie firmy, kursy walut, notowania akcji, fundusze.' # noqa - masthead_url = 'http://www.bankier.pl/gfx/hd-mid-02.gif' - INDEX = 'http://bankier.pl/' - remove_empty_feeds = True - oldest_article = 1 - max_articles_per_feed = 100 - remove_javascript = True - no_stylesheets = True - simultaneous_downloads = 5 - - keep_only_tags = [] - keep_only_tags.append(dict(name='div', attrs={'align': 'left'})) - - remove_tags = [] - remove_tags.append(dict(name='table', attrs={'cellspacing': '2'})) - remove_tags.append(dict(name='div', attrs={'align': 'center'})) - remove_tags.append(dict(name='img', attrs={'src': '/gfx/hd-mid-02.gif'})) - - feeds = [ - (u'Wiadomości dnia', u'http://feeds.feedburner.com/bankier-wiadomosci-dnia'), - (u'Finanse osobiste', u'http://feeds.feedburner.com/bankier-finanse-osobiste'), - (u'Firma', u'http://feeds.feedburner.com/bankier-firma'), - (u'Giełda', u'http://feeds.feedburner.com/bankier-gielda'), - (u'Rynek walutowy', u'http://feeds.feedburner.com/bankier-rynek-walutowy'), - (u'Komunikaty ze spółek', u'http://feeds.feedburner.com/bankier-espi'), - ] - - def print_version(self, url): - segment = url.split('.') - urlPart = segment[2] - segments = urlPart.split('-') - urlPart2 = segments[-1] - return 'http://www.bankier.pl/wiadomosci/print.html?article_id=' + urlPart2 diff --git a/recipes/bay_citizen.recipe b/recipes/bay_citizen.recipe deleted file mode 100644 index 5c9aac6450..0000000000 --- a/recipes/bay_citizen.recipe +++ /dev/null @@ -1,46 +0,0 @@ -from calibre.web.feeds.news import BasicNewsRecipe - - -class TheBayCitizen(BasicNewsRecipe): - title = 'The Bay Citizen' - language = 'en' - __author__ = 'noah' - description = 'The Bay Citizen' - publisher = 'The Bay Citizen' - INDEX = u'http://www.baycitizen.org' - category = 'news' - oldest_article = 2 - max_articles_per_feed = 20 - no_stylesheets = True - masthead_url = 'http://media.baycitizen.org/images/layout/logo1.png' - feeds = [('Main Feed', 'http://www.baycitizen.org/feeds/stories/')] - keep_only_tags = [dict(name='div', attrs={'class': 'story'})] - remove_tags = [ - dict(name='div', attrs={'class': 'socialBar'}), - dict(name='div', attrs={'id': 'text-resize'}), - dict(name='div', attrs={'class': 'story relatedContent'}), - dict(name='div', attrs={'id': 'comment_status_loading'}), - ] - - def append_page(self, soup, appendtag, position): - pager = soup.find('a', attrs={'class': 'stry-next'}) - if pager: - nexturl = self.INDEX + pager['href'] - soup2 = self.index_to_soup(nexturl) - texttag = soup2.find('div', attrs={'class': 'body'}) - for it in texttag.findAll(style=True): - del it['style'] - newpos = len(texttag.contents) - self.append_page(soup2, texttag, newpos) - texttag.extract() - appendtag.insert(position, texttag) - - def preprocess_html(self, soup): - for item in soup.findAll(style=True): - del item['style'] - self.append_page(soup, soup.body, 3) - garbage = soup.findAll(id='story-pagination') - [trash.extract() for trash in garbage] - garbage = soup.findAll('em', 'cont-from-prev') - [trash.extract() for trash in garbage] - return soup diff --git a/recipes/beppe_grillo.recipe b/recipes/beppe_grillo.recipe deleted file mode 100644 index 02d23b4e55..0000000000 --- a/recipes/beppe_grillo.recipe +++ /dev/null @@ -1,16 +0,0 @@ -__license__ = 'GPL v3' - -from calibre.web.feeds.news import BasicNewsRecipe - - -class AdvancedUserRecipe1327747616(BasicNewsRecipe): - title = u'Beppe Grillo' - oldest_article = 7 - max_articles_per_feed = 100 - auto_cleanup = True - - feeds = [(u'Beppe Grillo', u'http://feeds.feedburner.com/beppegrillo/atom')] - description = 'Blog of the famous comedian and politician Beppe Grillo - v1.00 (28, January 2012)' - __author__ = 'faber1971' - - language = 'it' diff --git a/recipes/berliner_zeitung.recipe b/recipes/berliner_zeitung.recipe deleted file mode 100644 index cda1568514..0000000000 --- a/recipes/berliner_zeitung.recipe +++ /dev/null @@ -1,48 +0,0 @@ -#!/usr/bin/env python -# vim:fileencoding=utf-8 -# License: GPLv3 Copyright: 2016, Kovid Goyal - -from __future__ import absolute_import, division, print_function, unicode_literals - -from calibre.web.feeds.recipes import BasicNewsRecipe - - -def classes(classes): - q = frozenset(classes.split(' ')) - return dict(attrs={'class': lambda x: x and frozenset(x.split()).intersection(q)}) - - -class BerlinerZeitung(BasicNewsRecipe): - title = 'Berliner Zeitung' - __author__ = 'Kovid Goyal' - language = 'de' - description = 'Berliner Zeitung RSS' - timefmt = ' [%d.%m.%Y]' - ignore_duplicate_articles = {'title', 'url'} - remove_empty_feeds = True - - # oldest_article = 7.0 - no_stylesheets = True - remove_javascript = True - use_embedded_content = False - publication_type = 'newspaper' - - keep_only_tags = [ - classes('dm_article_body dm_article_header'), - ] - remove_tags = [ - classes('dm_article_share'), - ] - - feeds = [x.split() for x in [ - 'Berlin http://www.berliner-zeitung.de/blueprint/servlet/xml/berliner-zeitung/23699382-asYahooFeed.xml', - 'Brandenburg http://www.berliner-zeitung.de/blueprint/servlet/xml/berliner-zeitung/23699570-asYahooFeed.xml', - 'Politik http://www.berliner-zeitung.de/blueprint/servlet/xml/berliner-zeitung/23699614-asYahooFeed.xml', - 'Wirtschaft http://www.berliner-zeitung.de/blueprint/servlet/xml/berliner-zeitung/23699644-asYahooFeed.xml', - 'Sport http://www.berliner-zeitung.de/blueprint/servlet/xml/berliner-zeitung/23699874-asYahooFeed.xml', - 'Kultur http://www.berliner-zeitung.de/blueprint/servlet/xml/berliner-zeitung/23700020-asYahooFeed.xml', - 'Panorama http://www.berliner-zeitung.de/blueprint/servlet/xml/berliner-zeitung/23700178-asYahooFeed.xml', - 'Wissen http://www.berliner-zeitung.de/blueprint/servlet/xml/berliner-zeitung/23700222-asYahooFeed.xml', - 'Digital http://www.berliner-zeitung.de/blueprint/servlet/xml/berliner-zeitung/23700594-asYahooFeed.xml', - 'Ratgeber http://www.berliner-zeitung.de/blueprint/servlet/xml/berliner-zeitung/23700190-asYahooFeed.xml', - ]] diff --git a/recipes/berlingske_dk.recipe b/recipes/berlingske_dk.recipe deleted file mode 100644 index 2900eee704..0000000000 --- a/recipes/berlingske_dk.recipe +++ /dev/null @@ -1,49 +0,0 @@ -__license__ = 'GPL v3' -__copyright__ = '2010, Darko Miletic ' -''' -berlingske.dk -''' - -from calibre.web.feeds.news import BasicNewsRecipe - - -class Berlingske_dk(BasicNewsRecipe): - title = 'Berlingske Tidende' - __author__ = 'Darko Miletic' - description = 'News from Denmark' - publisher = 'berlingske.dk' - category = 'news, politics, Denmark' - oldest_article = 2 - max_articles_per_feed = 100 - no_stylesheets = True - remove_empty_feeds = True - use_embedded_content = False - remove_javascript = True - publication_type = 'newspaper' - encoding = 'utf8' - language = 'da' - auto_cleanup = True - extra_css = ''' - .manchet {color:#888888;} - .dateline {font-size: x-small; color:#444444;} - .manchet,.dateline { font-family: Cambria,Georgia,Times,"Times New Roman",serif } - .body {font-family: Arial,Helvetica,sans-serif } - ''' - - conversion_options = { - 'comment': description, 'tags': category, 'publisher': publisher, 'language': language - } - - feeds = [ - - (u'Breaking news', u'http://www.b.dk/breaking/rss'), - (u'Seneste nyt', u'http://www.b.dk/seneste/rss'), - (u'Topnyheder', u'http://www.b.dk/top/rss'), - (u'Danmark', u'http://www.b.dk/danmark/seneste/rss'), - (u'Verden', u'http://www.b.dk/verden/seneste/rss'), - (u'Klima', u'http://www.b.dk/klima/seneste/rss'), - (u'Debat', u'http://www.b.dk/debat/seneste/rss'), - (u'Koebenhavn', u'http://www.b.dk/koebenhavn/seneste/rss'), - (u'Politik', u'http://www.b.dk/politik/seneste/rss'), - (u'Kultur', u'http://www.b.dk/kultur/seneste/rss') - ] diff --git a/recipes/bighollywood.recipe b/recipes/bighollywood.recipe deleted file mode 100644 index 0f0c06760d..0000000000 --- a/recipes/bighollywood.recipe +++ /dev/null @@ -1,60 +0,0 @@ -__license__ = 'GPL v3' -__copyright__ = '2011, Darko Miletic ' -''' -bighollywood.breitbart.com -''' - -from calibre.web.feeds.news import BasicNewsRecipe - - -class BigHollywood(BasicNewsRecipe): - title = 'Big Hollywood' - __author__ = 'Darko Miletic' - description = 'News and articles from the media world' - publisher = 'Big Hollywood' - category = 'news, media, art, literature, movies, politics, USA, Hollywood' - oldest_article = 7 - max_articles_per_feed = 200 - no_stylesheets = True - encoding = 'utf8' - use_embedded_content = False - language = 'en' - remove_empty_feeds = True - publication_type = 'blog' - extra_css = """ - body{font-family: Arial,sans-serif } - """ - - conversion_options = { - 'comment': description, 'tags': category, 'publisher': publisher, 'language': language - } - - keep_only_tags = [dict(attrs={'class': 'postcontent'})] - remove_tags = [ - dict(name=['meta', 'link', 'link', 'iframe', 'embed', 'object']), dict( - name='p', attrs={'class': ['post_meta_links', 'postfooter']}) - ] - remove_attributes = ['original', 'onclick'] - - feeds = [(u'Articles', u'http://bighollywood.breitbart.com/feed/')] - - def preprocess_html(self, soup): - for item in soup.findAll(style=True): - del item['style'] - for item in soup.findAll('a'): - limg = item.find('img') - if item.string is not None: - str = item.string - item.replaceWith(str) - else: - if limg: - if limg['src'].endswith('BlogPrintButton.png'): - limg.extract() - item.name = 'div' - item.attrs = [] - else: - str = self.tag_to_string(item) - item.replaceWith(str) - for item in soup.findAll('img', alt=False): - item['alt'] = 'image' - return soup diff --git a/recipes/birgun_gazetesi.recipe b/recipes/birgun_gazetesi.recipe deleted file mode 100644 index 451d0dda5a..0000000000 --- a/recipes/birgun_gazetesi.recipe +++ /dev/null @@ -1,45 +0,0 @@ -# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai - -from calibre.web.feeds.news import BasicNewsRecipe - - -class Birgun (BasicNewsRecipe): - - title = u'Birgün Gazetesi' - __author__ = u'Osman Kaysan' - oldest_article = 7 - max_articles_per_feed = 150 - use_embedded_content = False - description = 'Birgun gazatesi haberleri, kose yazarlari' - publisher = 'Birgün' - category = 'news,haberler,turkce,gazete,birgun' - language = 'tr' - no_stylesheets = True - publication_type = 'newspaper' - - conversion_options = { - 'comments': description, 'tags': category, 'language': language, 'publisher': publisher, 'linearize_tables': True, 'remove_paragraph_spacing': True, - } - - cover_img_url = 'http://www.birgun.net/i/birgun.png' - masthead_url = 'http://www.birgun.net/i/birgun.png' - - remove_attributes = ['width', 'height'] - - remove_tags_before = dict(name='h2', attrs={'class': 'storyHeadline'}) - remove_tags_after = dict(name='tr', attrs={'valign': 'top'}) - remove_tags = [dict(name='div', attrs={'id': 'byLine'}), dict(name='div', attrs={'class': 'toollinks'}), dict(name='div', attrs={ - 'class': 'main-lead'}), dict(name='div', attrs={'class': 'addthis_toolbox addthis_default_style'}), dict(name='a', attrs={'class': 'addthis_button'})] - - remove_empty_feeds = True - - feeds = [ - - (u'Güncel', u'http://www.birgun.net/actuels.xml'), - (u'Köşe Yazarları', u'http://www.birgun.net/writer.xml'), - (u'Politika', u'http://www.birgun.net/politics.xml'), - (u'Ekonomi', u'http://www.birgun.net/economic.xml'), - (u'Çalışma Yaşamı', u'http://www.birgun.net/workers.xml'), - (u'Dünya', u'http://www.birgun.net/worlds.xml'), - (u'Yaşam', u'http://www.birgun.net/lifes.xml') - ] diff --git a/recipes/birmingham_post.recipe b/recipes/birmingham_post.recipe deleted file mode 100644 index e87b215def..0000000000 --- a/recipes/birmingham_post.recipe +++ /dev/null @@ -1,56 +0,0 @@ -from __future__ import print_function - -import re - -import mechanize -from calibre.web.feeds.news import BasicNewsRecipe - - -class AdvancedUserRecipe1306097511(BasicNewsRecipe): - title = u'Birmingham post' - description = 'Author D.Asbury. News for Birmingham UK' - __author__ = 'Dave Asbury' - cover_url = 'http://profile.ak.fbcdn.net/hprofile-ak-snc4/161987_9010212100_2035706408_n.jpg' - oldest_article = 2 - max_articles_per_feed = 20 - linearize_tables = True - remove_empty_feeds = True - remove_javascript = True - no_stylesheets = True - auto_cleanup = True - language = 'en_GB' - compress_news_images = True - cover_url = 'http://profile.ak.fbcdn.net/hprofile-ak-snc4/161987_9010212100_2035706408_n.jpg' - - masthead_url = 'http://www.trinitymirror.com/images/birminghampost-logo.gif' - - def get_cover_url(self): - soup = self.index_to_soup('http://www.birminghampost.net') - # look for the block containing the sun button and url - cov = soup.find(attrs={'height': re.compile( - '3'), 'alt': re.compile('Post')}) - print() - print('%%%%%%%%%%%%%%%', cov) - print() - cov2 = str(cov['src']) - print('88888888 ', cov2, ' 888888888888') - - # cover_url=cov2 - # return cover_url - br = mechanize.Browser() - br.set_handle_redirect(False) - try: - br.open_novisit(cov2) - cover_url = cov2 - except: - cover_url = 'http://profile.ak.fbcdn.net/hprofile-ak-snc4/161987_9010212100_2035706408_n.jpg' - return cover_url - - feeds = [ - (u'West Mids. News', - u'http://www.birminghampost.net/news/west-midlands-news/rss.xml'), - (u'UK News', u'http://www.birminghampost.net/news/uk-news/rss.xml'), - (u'Sports', u'http://www.birminghampost.net/midlands-birmingham-sport/rss.xml'), - (u'Bloggs & Comments', u'http://www.birminghampost.net/comment/rss.xml') - - ] diff --git a/recipes/bitacora.recipe b/recipes/bitacora.recipe deleted file mode 100644 index 7de43e0b67..0000000000 --- a/recipes/bitacora.recipe +++ /dev/null @@ -1,57 +0,0 @@ -#!/usr/bin/env python - -__license__ = 'GPL v3' -__author__ = '2010, Gustavo Azambuja ' -''' -bitacora.com.uy -''' - -from calibre.web.feeds.news import BasicNewsRecipe - - -class General(BasicNewsRecipe): - title = 'bitacora.com.uy' - __author__ = 'Gustavo Azambuja' - description = 'Noticias de Uruguay' - language = 'es_UY' - timefmt = '[%a, %d %b, %Y]' - use_embedded_content = False - recursion = 5 - encoding = 'iso-8859-1' - remove_javascript = True - no_stylesheets = True - - oldest_article = 2 - max_articles_per_feed = 100 - keep_only_tags = [dict(id=['txt'])] - remove_tags = [ - dict(name='div', attrs={'class': 'tablafoot'}), - dict(name=['object', 'h4']), - dict(name=['object', 'link']) - ] - - remove_attributes = ['width', 'height', 'style', 'font', 'color'] - - extra_css = ''' - h1{font-family:Geneva, Arial, Helvetica, sans-serif;color:#154B7A;} - h3{font-size: 14px;color:#999999; font-family:Geneva, Arial, Helvetica, sans-serif;font-weight: bold;} - h2{color:#666666; font-family:Geneva, Arial, Helvetica, sans-serif;font-size:small;} - p {font-family:Arial,Helvetica,sans-serif;} - ''' - feeds = [ - (u'Titulares', u'http://www.bitacora.com.uy/anxml.cgi?15') - ] - - def get_cover_url(self): - cover_url = None - index = 'http://www.bitacora.com.uy' - soup = self.index_to_soup(index) - link_item = soup.find('img', attrs={'class': 'imgtapa'}) - if link_item: - cover_url = "http://www.bitacora.com.uy/" + link_item['src'] - return cover_url - - def preprocess_html(self, soup): - for item in soup.findAll(style=True): - del item['style'] - return soup diff --git a/recipes/biz_portal.recipe b/recipes/biz_portal.recipe deleted file mode 100644 index 32cecc4b72..0000000000 --- a/recipes/biz_portal.recipe +++ /dev/null @@ -1,38 +0,0 @@ -from calibre.web.feeds.news import BasicNewsRecipe - - -class AdvancedUserRecipe1283848012(BasicNewsRecipe): - description = 'This is a recipe of BizPortal.co.il.' - cover_url = 'http://www.bizportal.co.il/shukhahon/images/bizportal.jpg' - title = u'BizPortal' - language = 'he' - __author__ = 'marbs' - extra_css = 'img {max-width:100%;} body{direction: rtl;},title{direction: rtl; } ,article_description{direction: rtl; }, a.article{direction: rtl; } ,calibre_feed_description{direction: rtl; }' # noqa - simultaneous_downloads = 5 - remove_javascript = True - timefmt = '[%a, %d %b, %Y]' - remove_empty_feeds = True - oldest_article = 1 - max_articles_per_feed = 100 - remove_attributes = ['width'] - simultaneous_downloads = 5 - remove_tags = [dict(name='img', attrs={'scr': ['images/bizlogo_nl.gif']})] - max_articles_per_feed = 100 - - feeds = [(u'חדשות שוק ההון', u'http://www.bizportal.co.il/shukhahon/messRssUTF2.xml'), - (u'חדשות וול סטריט בעברית', - u'http://www.bizportal.co.il/shukhahon/images/bizportal.jpg'), - (u'שיווק ופרסום', u'http://www.bizportal.co.il/shukhahon/messRssUTF145.xml'), - (u'משפט', u'http://www.bizportal.co.il/shukhahon/messRssUTF3.xml'), - (u'ניתוח טכני', u'http://www.bizportal.co.il/shukhahon/messRssUTF5.xml'), - (u'דיני עבודה ושכר', u'http://www.bizportal.co.il/shukhahon/messRssUTF6.xml'), - (u'מיסוי', u'http://www.bizportal.co.il/shukhahon/messRssUTF7.xml'), - (u'טאבו', u'http://www.bizportal.co.il/shukhahon/messRssUTF8.xml'), - (u'נדל"ן', u'http://www.bizportal.co.il/shukhahon/messRssUTF160.xml'), - ] - - def print_version(self, url): - split1 = url.split("=") - print_url = 'http://www.bizportal.co.il/web/webnew/shukhahon/biznews02print.shtml?mid=' + \ - split1[1] - return print_url diff --git a/recipes/boortz.recipe b/recipes/boortz.recipe deleted file mode 100644 index 2c131382ef..0000000000 --- a/recipes/boortz.recipe +++ /dev/null @@ -1,31 +0,0 @@ -#!/usr/bin/env python -__license__ = 'GPL v3' -__author__ = 'Tony Stegall' -__copyright__ = '2010, Tony Stegall or Tonythebookworm on mobiread.com' -__version__ = '1.04' -__date__ = '27, September 2010' -__docformat__ = 'restructuredtext en' - - -from calibre.web.feeds.news import BasicNewsRecipe - - -class AdvancedUserRecipe1282101454(BasicNewsRecipe): - title = 'Nealz Nuze' - language = 'en' - __author__ = 'TonytheBookworm' - description = 'Neal Boortz Show Radio Notes' - publisher = 'Neal Boortz' - category = 'news, politics, USA, talkshow' - oldest_article = 1 - max_articles_per_feed = 100 - - no_stylesheets = True - remove_javascript = True - use_embedded_content = True - masthead_url = 'http://boortz.com/images/nuze_logo.gif' - conversion_options = {'linearize_tables': True} - feeds = [ - ('NUZE', 'http://boortz.com/nealz_nuze_rss/rss.xml') - - ] diff --git a/recipes/borse_online.recipe b/recipes/borse_online.recipe deleted file mode 100644 index e28a49104b..0000000000 --- a/recipes/borse_online.recipe +++ /dev/null @@ -1,34 +0,0 @@ -# vim:fileencoding=utf-8 -from calibre.web.feeds.recipes import BasicNewsRecipe - - -class AdvancedUserRecipe1303841067(BasicNewsRecipe): - - title = u'Börse-online' - __author__ = 'schuster, Armin Geller' # AGE upd 2013-11-29 - oldest_article = 1 - max_articles_per_feed = 100 - no_stylesheets = True - use_embedded_content = False - language = 'de' - remove_javascript = True - remove_empty_feeds = True - ignore_duplicate_articles = {'title', 'url'} - encoding = 'utf-8' - timefmt = ' [%a, %d %b %Y]' - - cover_url = 'http://www.wirtschaftsmedien-shop.de/s/media/coverimages/7576_2013107.jpg' - masthead_url = 'http://upload.wikimedia.org/wikipedia/de/5/56/B%C3%B6rse_Online_Logo.svg' - - feeds = [(u'Börsennachrichten', u'http://www.boerse-online.de/rss'), - (u'Märkte', u'http://www.boerse-online.de/rss/maerkte'), - (u'Chartanalyse', u'http://www.boerse-online.de/rss/maerkte/chartanalyse'), - (u'Aktien', u'http://www.boerse-online.de/rss/aktie'), - (u'Aktien-Chartanalyse', - u'http://www.boerse-online.de/rss/aktie/chartanalyse'), - (u'zertifikate', u'http://www.boerse-online.de/rss/zertifikat') - ] - - def print_version(self, url): - s1, s2 = url.rsplit('/', 1) - return 'http://www.boerse-online.de/nachrichten/drucken/' + s2 diff --git a/recipes/boxis.recipe b/recipes/boxis.recipe deleted file mode 100644 index 5cd1e1db56..0000000000 --- a/recipes/boxis.recipe +++ /dev/null @@ -1,18 +0,0 @@ -#!/usr/bin/env python -# vim:fileencoding=utf-8 -from __future__ import absolute_import, division, print_function, unicode_literals - -from calibre.web.feeds.news import AutomaticNewsRecipe - - -class BasicUserRecipe1501590114(AutomaticNewsRecipe): - title = 'Boxis' - oldest_article = 240 - max_articles_per_feed = 100 - auto_cleanup = True - language = 'sc' - __author__ = 'tzium' - - feeds = [ - ('Boxis', 'http://www.boxis.it/sc/feed/'), - ] diff --git a/recipes/brand_eins.recipe b/recipes/brand_eins.recipe deleted file mode 100644 index f4659ce1b2..0000000000 --- a/recipes/brand_eins.recipe +++ /dev/null @@ -1,130 +0,0 @@ -#!/usr/bin/env python -# vim:fileencoding=utf-8 -from __future__ import unicode_literals - -__license__ = 'GPL v3' -__version__ = '0.2' - -''' -brand eins.de -''' -from collections import OrderedDict - -from calibre.web.feeds.news import BasicNewsRecipe - - -class BrandEins(BasicNewsRecipe): - - title = u'brand eins' - __author__ = 'Nikolas Mangold-Takao, Thomas Schlenkhoff' - language = 'de' - description = u'brand eins beschreibt den momentanen Wandel in Wirtschaft und Gesellschaft.' - publisher = u'brand eins Verlag GmbH & Co. oHG' - category = 'politics, business, wirtschaft, Germany' - - PREFIX = 'http://www.brandeins.de/' - INDEX = PREFIX + 'archiv/listeansicht.html' - - use_embedded_content = False - resolve_internal_links = True - - no_stylesheets = True - needs_subscription = False - - delay = 1 - summary_length = 200 - simultaneous_downloads = 5 - remove_javascript = True - - keep_only_tags = dict(name='div', attrs={'id': 'content'}) - - # remove share image from articles - remove_tags = [dict(name='div', attrs={'id': 'oms_gpt_billboard'}), - dict(name='div', attrs={'id': 'oms_gpt_rectangle'}), - dict(name='h3', attrs={'class': 'sharing-headline'}), - dict(name='div', attrs={'class': 'sharing-links'}), - dict(name='aside', attrs={'class': 'articleAside'})] - - remove_tags_before = dict( - name='div', attrs={'class': 'innerContent typeArticle'}) - remove_tags_after = dict(name='div', attrs={'id': 'socialshareprivacy'}) - - extra_css = ''' - body, p {text-align: left;} - .headline {font-size: x-large;} - h2 {font-size: medium;} - h1 {font-size: large;} - em.Bold {font-weight:bold;font-style:normal;} - em.Italic {font-style:italic;} - ''' - - def parse_index(self): - issue = "" - - soup = self.index_to_soup(self.INDEX) - issue_list = soup.findAll('div', attrs={'class': 'details'}) - - issue_map = {} - i = 0 - for entry in issue_list: - title = self.tag_to_string(entry.find( - 'h3', attrs={'class': 'like-h1'})) - issue_string = self.tag_to_string( - entry.find('span', attrs={'class': 'meta'})) - year = issue_string[8:] - month = issue_string[5:-5] - yyyymm = "{}{}".format(year, month) - link = entry.findAll('a')[0] - issue_map[yyyymm] = link.get('href') - self.log('- ', year, month, title, link.get('href')) - - # Issue 1 (most recent) has only few articles online, - # Issue 2 and 3 (2nd and 3rd recent) is not completely online. - # Issue 4 (4th recent) is completely online, hence i == 3 - - if issue == "" and i == 3: - issue = yyyymm - i += 1 - - url = 'http://brandeins.de/' + issue_map[issue] - self.log('Issue to get: ', issue, title, url) - self.issue_url = url # save to extract cover - - return self.parse_issue(url) - - def parse_issue(self, url): - soup = self.index_to_soup(url) - feeds = OrderedDict() - - for item in soup.findAll(attrs={'class': lambda x: 'ihv_item' in (x or '').split()}): - a = item.findParent('a', href=True) - if a is None: - continue - url = self.PREFIX + a['href'] - title = self.tag_to_string(item.find(attrs={'class': 'ihv_title'})) - sec = self.tag_to_string( - item.find(attrs={'class': 'ihv_page_category'}).findAll('span')[-1]) - if sec not in feeds: - feeds[sec] = [] - desc = '' - for p in item.findAll('p'): - desc += self.tag_to_string(p) - feeds[sec].append( - {'title': title, 'url': url, 'description': desc}) - self.log('Found article:', title, 'at', url) - - return [(st, articles) for st, articles in feeds.items() if articles] - - def get_cover_url(self): - # the index does not contain a usable cover, but the 'Welt in - # Zahlen'-article contains it - cover_article = "{}{}".format( - self.issue_url, 'die-welt-in-zahlen.html') - self.log('Cover article URL: %s' % cover_article) - soup = self.index_to_soup(cover_article) - img = soup.find('section', 'asideSection no-content').find('img') - self.log('Found cover image url: %s' % img['src']) - return (self.PREFIX + img['src']) - - def preprocess_raw_html(self, raw_html, url): - return raw_html.replace('

• ', '

') diff --git a/recipes/brasil_de_fato.recipe b/recipes/brasil_de_fato.recipe deleted file mode 100644 index 56337c3b91..0000000000 --- a/recipes/brasil_de_fato.recipe +++ /dev/null @@ -1,33 +0,0 @@ -# -*- coding: utf-8 -*- - -from calibre.web.feeds.news import BasicNewsRecipe - - -class BrasilDeFato(BasicNewsRecipe): - news = True - title = u'Brasil de Fato' - __author__ = 'Alex Mitrani' - description = u'Uma visão popular do Brasil e do mundo.' - publisher = u'SOCIEDADE EDITORIAL BRASIL DE FATO' - category = 'news, politics, Brazil, rss, Portuguese' - oldest_article = 10 - max_articles_per_feed = 100 - summary_length = 1000 - language = 'pt_BR' - - remove_javascript = True - no_stylesheets = True - use_embedded_content = False - remove_empty_feeds = True - masthead_url = 'http://www.brasildefato.com.br/sites/default/files/zeropoint_logo.jpg' - keep_only_tags = [dict(name='div', attrs={'id': 'main'})] - remove_tags = [dict(name='div', attrs={'class': 'links'})] - remove_tags_after = [dict(name='div', attrs={'class': 'links'})] - - feeds = [ - (u'Nacional', u'http://www.brasildefato.com.br/rss_nacional'), - (u'Internacional', u'http://www.brasildefato.com.br/rss_internacional'), - (u'Entrevista', u'http://www.brasildefato.com.br/rss_entrevista'), - (u'Cultura', u'http://www.brasildefato.com.br/rss_cultura'), - (u'Análise', u'http://www.brasildefato.com.br/rss_analise') - ] diff --git a/recipes/brecha.recipe b/recipes/brecha.recipe deleted file mode 100644 index 0986017c79..0000000000 --- a/recipes/brecha.recipe +++ /dev/null @@ -1,81 +0,0 @@ -from __future__ import absolute_import, division, print_function, unicode_literals - -__license__ = 'GPL v3' -__copyright__ = '2012, Darko Miletic ' -''' -www.brecha.com.uy -''' - -try: - from urllib.parse import quote, urlencode -except ImportError: - from urllib import quote, urlencode -from calibre.web.feeds.news import BasicNewsRecipe - - -class Brecha(BasicNewsRecipe): - title = 'Brecha Digital' - __author__ = 'Darko Miletic' - description = 'Brecha , Cultura ,Sociales , Separatas, Lupas, Vueltas de Montevideo y toda la infomacion que caracteriza a este semanario' - publisher = 'Brecha' - category = 'brecha, digital, prensa, uruguay, semanario, sociedad, politica, cultura' - oldest_article = 7 - max_articles_per_feed = 200 - no_stylesheets = True - encoding = 'utf8' - use_embedded_content = False - language = 'es_UY' - remove_empty_feeds = True - publication_type = 'magazine' - auto_cleanup = True - needs_subscription = 'optional' - masthead_url = 'http://www.brecha.com.uy/templates/ja_nex/themes/orange/images/logo.png' - extra_css = """ - body{font-family: Arial,Helvetica,sans-serif } - img{margin-bottom: 0.4em; display:block} - """ - - conversion_options = { - 'comment': description, 'tags': category, 'publisher': publisher, 'language': language - } - - def get_browser(self): - br = BasicNewsRecipe.get_browser(self) - br.open('http://www.brecha.com.uy/index.php/acceder-miembros') - if self.username is not None and self.password is not None: - data = urlencode({'task': 'login', 'view': 'register', 'username': self.username, 'password': self.password - }) - br.open( - 'http://www.brecha.com.uy/index.php/index.php?option=com_osemsc&controller=register', data) - return br - - remove_tags = [ - dict(name=['meta', 'link']), - dict(name='div', attrs={'id': 'js_ja'}), - dict(name='ul', attrs={'class': 'actions'}) - ] - remove_attributes = ['lang', 'border'] - - feeds = [ - (u'Politica', u'http://www.brecha.com.uy/index.php/politica-uruguaya?format=feed&type=rss'), - (u'Mundo', u'http://www.brecha.com.uy/index.php/mundo?format=feed&type=rss'), - (u'Mapamundi', u'http://www.brecha.com.uy/index.php/mundo/mapamundi?format=feed&type=rss'), - (u'Cultura', u'http://www.brecha.com.uy/index.php/cultura?format=feed&type=rss'), - (u'Vueltas de Montevideo', - u'http://www.brecha.com.uy/index.php/cultura/vueltas-de-montevideo?format=feed&type=rss'), - (u'Secos y Mojados', u'http://www.brecha.com.uy/index.php/cultura/secos-y-mojados?format=feed&type=rss'), - (u'Literarias', u'http://www.brecha.com.uy/index.php/cultura/literarias?format=feed&type=rss'), - (u'Sociedad', u'http://www.brecha.com.uy/index.php/sociedad?format=feed&type=rss'), - (u'Especiales', u'http://www.brecha.com.uy/index.php/especiales?format=feed&type=rss'), - (u'Contratapa', u'http://www.brecha.com.uy/index.php/contratapa?format=feed&type=rss') - ] - - def print_version(self, url): - return url + '?tmpl=component&print=1&layout=default&page=' - - def get_cover_url(self): - soup = self.index_to_soup('http://www.brecha.com.uy/index.php') - for image in soup.findAll('img', alt=True): - if image['alt'].startswith('Tapa '): - return 'http://www.brecha.com.uy' + quote(image['src']) - return None diff --git a/recipes/brhat.recipe b/recipes/brhat.recipe deleted file mode 100644 index b2f72a7de1..0000000000 --- a/recipes/brhat.recipe +++ /dev/null @@ -1,16 +0,0 @@ -#!/usr/bin/env python -# vim:fileencoding=utf-8 -from calibre.web.feeds.news import BasicNewsRecipe - - -class Brhat(BasicNewsRecipe): - title = 'Brhat' - __author__ = 'Vishvas Vasuki' - language = 'en_IN' - oldest_article = 365 - max_articles_per_feed = 100 - auto_cleanup = True - - feeds = [ - ('Main', 'https://brhat.in/feed/'), - ] diff --git a/recipes/bsi_news.recipe b/recipes/bsi_news.recipe deleted file mode 100644 index 49ff39d7ab..0000000000 --- a/recipes/bsi_news.recipe +++ /dev/null @@ -1,63 +0,0 @@ -#!/usr/bin/env python -from __future__ import absolute_import, division, print_function, unicode_literals - -from calibre.web.feeds.news import BasicNewsRecipe - - -class germanyBSI(BasicNewsRecipe): - # Title of the Recipe - # title = 'News des Bundesamt für Sicherheit in der Informationstechnik' - title = 'BSI News - DE' - cover_url = 'https://www.bsi.bund.de/SiteGlobals/Frontend/Images/BSI/logo.png' - # Author - __author__ = 'Volker Heggemann, VoHe' - # oldest article to download (in days) ---- can be edit by user - oldest_article = 7 - # describes itself, ---- can be edit by user - max_articles_per_feed = 100 - # speed up the download on fast computers be careful (I test max.20) - # ---- can be edit by user - simultaneous_downloads = 10 - # description, some Reader show this in titlepage - description = u'News from BSI' - # Who published the content? - publisher = u'Newsfeeds des BSI' - # What is the content of? - category = u'Sie erfahren, wenn neue Nachrichten auf der Internetseite des BSI veröffentlicht werden' - # describes itself, ---- can be edit by user - use_embedded_content = False - # describes itself, ---- can be edit by user - language = 'de' - # encoding of content. e.g. utf-8, None, ... - # ---- can be edit by user - encoding = None # 'utf-8' doesn't work here - # Removes javascript- why keep this, we only want static content - remove_javascript = True - # Removes empty feeds - why keep them!? - remove_empty_feeds = True - - # remove the rubbish (in ebook) - auto_cleanup = True - # now the content description and URL follows - # feel free to add, wipe out what you need ---- can be edit by user - # - # some of this are double - # - # - # Make some tests, may you first comment all of them out, and step by step you add what you'll need? - # - - feeds = [ - ('BSI - Germany - Sicherheitshinweise des Buerger-CERT', - 'https://www.bsi-fuer-buerger.de/SiteGlobals/Functions/RSSFeed/RSSNewsfessBSIFB/RSSNewsfeed_BuergerCERT.xml' - ), - ('BSI - Germany - Aktuelle Informationen BSI f\xfcr B\xfcrger', - 'https://www.bsi-fuer-buerger.de/SiteGlobals/Functions/RSSFeed/RSSNewsfessBSIFB/RSSNewsfeed_Buerger_aktuelle_Informationen.xml' - ), - ('Kurzinformationen des CERT-Bund zu Sicherheitsl\xfccken und Schwachstellen in IT-Systemen', - 'https://www.bsi.bund.de/SiteGlobals/Functions/RSSFeed/RSSNewsfeed/RSSNewsfeed_WID.xml' - ), - ('BSI - Germany - RSS-Newsfeed (Presse-, Kurzmitteilungen und Veranstaltungshinweise)', - 'https://www.bsi.bund.de/SiteGlobals/Functions/RSSFeed/RSSNewsfeed/RSSNewsfeed.xml' - ), - ] diff --git a/recipes/buchreport.recipe b/recipes/buchreport.recipe deleted file mode 100644 index 62250b69e6..0000000000 --- a/recipes/buchreport.recipe +++ /dev/null @@ -1,45 +0,0 @@ -from calibre.web.feeds.recipes import BasicNewsRecipe - -'''Calibre recipe to convert the RSS feeds of the Buchreport to an ebook.''' - - -class Buchreport(BasicNewsRecipe): - __author__ = 'a.peter' - __copyright__ = 'a.peter' - __license__ = 'GPL v3' - description = 'Buchreport' - version = 4 - title = u'Buchreport' - timefmt = ' [%d.%m.%Y]' - encoding = 'cp1252' - language = 'de' - - extra_css = 'body { margin-left: 0.00em; margin-right: 0.00em; } \ - article, articledate, articledescription { text-align: left; } \ - h1 { text-align: left; font-size: 140%; font-weight: bold; } \ - h2 { text-align: left; font-size: 100%; font-weight: bold; font-style: italic; } \ - h3 { text-align: left; font-size: 100%; font-weight: regular; font-style: italic; } \ - h4, h5, h6 { text-align: left; font-size: 100%; font-weight: bold; }' - - oldest_article = 7.0 - no_stylesheets = True - remove_javascript = True - use_embedded_content = False - publication_type = 'newspaper' - - remove_tags_before = dict(name='h2') - remove_tags_after = [ - dict(name='div', attrs={'style': ["padding-top:10px;clear:both"]}) - ] - remove_tags = [ - dict(name='div', attrs={'style': ["padding-top:10px;clear:both"]}), - dict(name='iframe'), - dict(name='img') - ] - - feeds = [ - (u'Buchreport', u'http://www.buchreport.de/index.php?id=5&type=100') - ] - - def get_masthead_url(self): - return 'http://www.buchreport.de/fileadmin/template/img/buchreport_logo.jpg' diff --git a/recipes/buckmasters.recipe b/recipes/buckmasters.recipe deleted file mode 100644 index b46d1cd154..0000000000 --- a/recipes/buckmasters.recipe +++ /dev/null @@ -1,49 +0,0 @@ -from calibre.ebooks.BeautifulSoup import Tag -from calibre.web.feeds.news import BasicNewsRecipe - - -def new_tag(soup, name, attrs=()): - impl = getattr(soup, 'new_tag', None) - if impl is not None: - return impl(name, attrs=dict(attrs)) - return Tag(soup, name, attrs=attrs or None) - - -class AdvancedUserRecipe1282101454(BasicNewsRecipe): - title = 'BuckMasters In The Kitchen' - language = 'en' - __author__ = 'TonytheBookworm & Starson17' - description = 'Learn how to cook all those outdoor varments' - publisher = 'BuckMasters.com' - category = 'food,cooking,recipes' - oldest_article = 365 - max_articles_per_feed = 100 - conversion_options = {'linearize_tables': True} - masthead_url = 'http://www.buckmasters.com/Portals/_default/Skins/BM_10/images/header_bg.jpg' - keep_only_tags = [ - dict(name='table', attrs={'class': ['containermaster_black']}) - ] - remove_tags_after = [dict(name='div', attrs={'align': ['left']})] - feeds = [ - ('Recipes', 'http://www.buckmasters.com/DesktopModules/DnnForge%20-%20NewsArticles/RSS.aspx?TabID=292&ModuleID=658&MaxCount=25'), - ] - - def preprocess_html(self, soup): - item = soup.find('a', attrs={'class': ['MenuTopSelected']}) - if item: - item.parent.extract() - for img_tag in soup.findAll('img'): - parent_tag = img_tag.parent - if parent_tag.name == 'a': - ntag = new_tag(soup, 'p') - ntag.insert(0, img_tag) - parent_tag.replaceWith(ntag) - elif parent_tag.name == 'p': - if not self.tag_to_string(parent_tag) == '': - new_div = new_tag(soup, 'div') - ntag = new_tag(soup, 'p') - ntag.insert(0, img_tag) - parent_tag.replaceWith(new_div) - new_div.insert(0, ntag) - new_div.insert(1, parent_tag) - return soup diff --git a/recipes/buenosaireseconomico.recipe b/recipes/buenosaireseconomico.recipe deleted file mode 100644 index 017329d1b2..0000000000 --- a/recipes/buenosaireseconomico.recipe +++ /dev/null @@ -1,53 +0,0 @@ -#!/usr/bin/env python -# -*- mode: python -*- -# -*- coding: utf-8 -*- - -__license__ = 'GPL v3' -__copyright__ = '2009-2016, Darko Miletic ' -''' -www.diariobae.com -''' -from calibre.web.feeds.news import BasicNewsRecipe - - -class BsAsEconomico(BasicNewsRecipe): - title = 'Buenos Aires Economico' - __author__ = 'Darko Miletic' - description = 'Diario BAE es el diario economico-politico con mas influencia en la Argentina. Fuente de empresarios y politicos del pais y el exterior. El pozo estaria aportando en periodos breves un volumen equivalente a 800m3 diarios. Pero todavia deben efectuarse otras perforaciones adicionales.' # noqa - publisher = 'Diario BAE' - category = 'news, politics, economy, Argentina' - oldest_article = 2 - max_articles_per_feed = 100 - no_stylesheets = True - use_embedded_content = False - encoding = 'utf-8' - language = 'es_AR' - masthead_url = 'http://static.cronica.com.ar/FileAccessHandler.ashx?code=635959869637084622' - remove_empty_feeds = True - publication_type = 'newspaper' - extra_css = """ - body{font-family: Georgia,"Times New Roman",Times,serif} - img{display: block; margin-top: 1em} - """ - conversion_options = { - 'comment' : description, - 'tags' : category, - 'publisher': publisher, - 'language' : language - } - - keep_only_tags = [dict(name='div', attrs={'class':'post'})] - remove_tags = [ - dict(name=['meta', 'base', 'iframe', 'link', 'lang']) - ,dict(attrs={'class':'pdfprnt-bottom-right'}) - ] - - feeds = [(u'Articles', u'http://www.diariobae.com/feed/getfeed')] - - def get_cover_url(self): - cover = None - soup = self.index_to_soup('http://www.diariobae.com/') - tag = soup.find('a', rel='lightbox[tapa]', href=True) - if tag: - cover = tag['href'] - return cover diff --git a/recipes/buffalo_news.recipe b/recipes/buffalo_news.recipe deleted file mode 100644 index 3d5a0fc3ec..0000000000 --- a/recipes/buffalo_news.recipe +++ /dev/null @@ -1,49 +0,0 @@ -__license__ = 'GPL v3' -__author__ = 'Todd Chapman' -__copyright__ = 'Todd Chapman' -__version__ = 'v0.2' -__date__ = '2 March 2011' - -''' -http://www.buffalonews.com/RSS/ -''' - -from calibre.web.feeds.news import BasicNewsRecipe - - -class BuffaloNews(BasicNewsRecipe): - title = u'Buffalo News' - oldest_article = 2 - language = 'en' - __author__ = 'ChappyOnIce, Krittika Goyal' - max_articles_per_feed = 20 - encoding = 'utf-8' - masthead_url = 'http://www.buffalonews.com/buffalonews/skins/buffalonews/images/masthead/the_buffalo_news_logo.png' - auto_cleanup = True - remove_empty_feeds = True - - feeds = [ - (u'City of Buffalo', u'http://www.buffalonews.com/city/communities/buffalo/?widget=rssfeed&view=feed&contentId=77944'), - (u'Southern Erie County', - u'http://www.buffalonews.com/city/communities/southern-erie/?widget=rssfeed&view=feed&contentId=77944'), - (u'Eastern Erie County', u'http://www.buffalonews.com/city/communities/eastern-erie/?widget=rssfeed&view=feed&contentId=77944'), - (u'Southern Tier', u'http://www.buffalonews.com/city/communities/southern-tier/?widget=rssfeed&view=feed&contentId=77944'), - (u'Niagara County', u'http://www.buffalonews.com/city/communities/niagara-county/?widget=rssfeed&view=feed&contentId=77944'), - (u'Business', u'http://www.buffalonews.com/business/?widget=rssfeed&view=feed&contentId=77944'), - (u'MoneySmart', u'http://www.buffalonews.com/business/moneysmart/?widget=rssfeed&view=feed&contentId=77944'), - (u'Bills & NFL', u'http://www.buffalonews.com/sports/bills-nfl/?widget=rssfeed&view=feed&contentId=77944'), - (u'Sabres & NHL', u'http://www.buffalonews.com/sports/sabres-nhl/?widget=rssfeed&view=feed&contentId=77944'), - (u'Bob DiCesare', u'http://www.buffalonews.com/sports/columns/bob-dicesare/?widget=rssfeed&view=feed&contentId=77944'), - (u'Bucky Gleason', u'http://www.buffalonews.com/sports/columns/bucky-gleason/?widget=rssfeed&view=feed&contentId=77944'), - (u'Mark Gaughan', u'http://www.buffalonews.com/sports/bills-nfl/inside-the-nfl/?widget=rssfeed&view=feed&contentId=77944'), - (u'Mike Harrington', u'http://www.buffalonews.com/sports/columns/mike-harrington/?widget=rssfeed&view=feed&contentId=77944'), - (u'Jerry Sullivan', u'http://www.buffalonews.com/sports/columns/jerry-sullivan/?widget=rssfeed&view=feed&contentId=77944'), - (u'Other Sports Columns', u'http://www.buffalonews.com/sports/columns/other-sports-columns/?widget=rssfeed&view=feed&contentId=77944'), - (u'Life', u'http://www.buffalonews.com/life/?widget=rssfeed&view=feed&contentId=77944'), - (u'Bruce Andriatch', u'http://www.buffalonews.com/city/columns/bruce-andriatch/?widget=rssfeed&view=feed&contentId=77944'), - (u'Donn Esmonde', u'http://www.buffalonews.com/city/columns/donn-esmonde/?widget=rssfeed&view=feed&contentId=77944'), - (u'Rod Watson', u'http://www.buffalonews.com/city/columns/rod-watson/?widget=rssfeed&view=feed&contentId=77944'), - (u'Entertainment', u'http://www.buffalonews.com/entertainment/?widget=rssfeed&view=feed&contentId=77944'), - (u'Off Main Street', u'http://www.buffalonews.com/city/columns/off-main-street/?widget=rssfeed&view=feed&contentId=77944'), - (u'Editorials', u'http://www.buffalonews.com/editorial-page/buffalo-news-editorials/?widget=rssfeed&view=feed&contentId=77944') - ] diff --git a/recipes/businessworldin.recipe b/recipes/businessworldin.recipe deleted file mode 100644 index f0e637dfb4..0000000000 --- a/recipes/businessworldin.recipe +++ /dev/null @@ -1,36 +0,0 @@ -__license__ = 'GPL v3' -__copyright__ = '2009-2010, Darko Miletic ' -''' -www.businessworld.in -''' - -from calibre.web.feeds.news import BasicNewsRecipe - - -class BusinessWorldMagazine(BasicNewsRecipe): - title = 'Business World Magazine' - __author__ = 'Kovid Goyal' - description = 'News from India' - category = 'news, politics, finances, India, Asia' - no_stylesheets = True - encoding = 'utf-8' - language = 'en_IN' - oldest_article = 2 - - keep_only_tags = [ - dict(attrs={'class': ['main-article']}), - ] - remove_tags = [ - dict(id='video_n_ad_div'), - dict(attrs={'class': ['meta-tools', 'social-article']}), - ] - remove_tags_after = dict(attrs={'class': 'social-article'}) - - feeds = ['http://www.businessworld.in/rss/all-article.xml'] - - def preprocess_html(self, soup): - for img in soup.findAll('img', attrs={'data-original': True}): - img['src'] = img['data-original'] - for ins in soup.findAll(attrs={'class': 'adsbygoogle'}): - ins.parent.extract() - return soup diff --git a/recipes/cafcaf_dergisi.recipe b/recipes/cafcaf_dergisi.recipe deleted file mode 100644 index 4864a277a8..0000000000 --- a/recipes/cafcaf_dergisi.recipe +++ /dev/null @@ -1,14 +0,0 @@ -# -*- coding: utf-8 -*- - -from calibre.web.feeds.news import BasicNewsRecipe - - -class BasicUserRecipe1325259641(BasicNewsRecipe): - language = 'tr' - __author__ = 'asalet_r' - title = u'CafCaf Dergisi' - oldest_article = 7 - max_articles_per_feed = 20 - auto_cleanup = True - - feeds = [(u'CafCaf', u'http://www.cafcafdergisi.net/feed/rss/')] diff --git a/recipes/camera_di_commercio_di_bari.recipe b/recipes/camera_di_commercio_di_bari.recipe deleted file mode 100644 index f81bbea2a3..0000000000 --- a/recipes/camera_di_commercio_di_bari.recipe +++ /dev/null @@ -1,20 +0,0 @@ -from calibre.web.feeds.news import BasicNewsRecipe - - -class AdvancedUserRecipe1331729727(BasicNewsRecipe): - title = u'Camera di Commercio di Bari' - oldest_article = 7 - __author__ = 'faber1971' - description = 'News from the Chamber of Commerce of Bari' - language = 'it' - max_articles_per_feed = 100 - auto_cleanup = True - masthead_url = 'http://www.ba.camcom.it/grafica/layout-bordo/logo_camcom_bari.png' - feeds = [(u'Camera di Commercio di Bari', - u'http://feed43.com/4715147488845101.xml')] - - -__license__ = 'GPL v3' -__copyright__ = '2012, faber1971' -__version__ = 'v1.00' -__date__ = '17, April 2012' diff --git a/recipes/capes_n_babes.recipe b/recipes/capes_n_babes.recipe deleted file mode 100644 index 3b18f52a33..0000000000 --- a/recipes/capes_n_babes.recipe +++ /dev/null @@ -1,12 +0,0 @@ -from calibre.web.feeds.news import BasicNewsRecipe - - -class CapesnBabesRecipe(BasicNewsRecipe): - title = u'Capes n Babes' - language = 'en' - description = 'The Capes n Babes comic Blog' - __author__ = 'skyhawker' - oldest_article = 31 - max_articles_per_feed = 100 - use_embedded_content = True - feeds = [(u'Capes & Babes', u'feed://www.capesnbabes.com/feed/')] diff --git a/recipes/capital.recipe b/recipes/capital.recipe deleted file mode 100644 index 416821cb7f..0000000000 --- a/recipes/capital.recipe +++ /dev/null @@ -1,41 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- - -__license__ = 'GPL v3' -__copyright__ = u'2011, Silviu Cotoar\u0103' -''' -capital.ro -''' - -from calibre.web.feeds.news import BasicNewsRecipe - - -class Capital(BasicNewsRecipe): - title = 'Capital' - __author__ = u'Silviu Cotoar\u0103' - description = u'\u0218tiri din Rom\u00e2nia' - oldest_article = 5 - language = 'ro' - max_articles_per_feed = 100 - no_stylesheets = True - use_embedded_content = False - category = 'Ziare,Stiri,Romania' - encoding = 'utf-8' - remove_javascript = True - publisher = 'Capital' - cover_url = 'http://www.mediapress.ro/imagini/sigla-capital-s16.gif' - - conversion_options = { - 'comments': description, 'tags': category, 'language': language, 'publisher': publisher - } - - keep_only_tags = [dict(name='div', attrs={'class': 'single one_article'}) - ] - - remove_tags = [dict(name='div', attrs={'class': 'single_details'}), dict(name='div', attrs={'class': 'tx-addoceansbanners-pi1'}) - ] - - feeds = [(u'\u0218tiri', u'http://www.capital.ro/rss.html')] - - def preprocess_html(self, soup): - return self.adeify_images(soup) diff --git a/recipes/capital_de.recipe b/recipes/capital_de.recipe deleted file mode 100644 index 28c758efe1..0000000000 --- a/recipes/capital_de.recipe +++ /dev/null @@ -1,84 +0,0 @@ -#!/usr/bin/env python -# vim:fileencoding=utf-8 -from __future__ import absolute_import, division, print_function, unicode_literals - -''' -capital.de -''' - -import re - -from calibre.web.feeds.news import BasicNewsRecipe - - -class AdvancedUserRecipe1305470859(BasicNewsRecipe): - title = 'Capital.de' - __author__ = 'schuster' - description = 'RSS-Feed von Capital.de' - publisher = 'Gruner+Jahr GmbH & Co KG' - language = 'de' - - oldest_article = 14 - max_articles_per_feed = 35 - no_stylesheets = True - remove_javascript = True - use_embedded_content = False - - conversion_options = {'smarten_punctuation': True, - 'publisher': publisher} - - cover_source = 'http://shop.capital.de/abos/capital/' - masthead_url = 'http://www.capital.de/files/capital/layout/logo.png' - - feeds = [ - ('Capital.de', 'http://www.capital.de/partner-feeds/rss.xml') - ] - - keep_only_tags = [ - dict(name='div', attrs={ - 'class': 'grid_8 alpha omega layout_full block'}) - ] - - remove_tags = [ - dict(name='div', attrs={'class': 'article_header'}), - dict(name='br', attrs={'class': 'clear'}) - ] - - remove_attributes = ['height', 'width'] - - extra_css = 'h1 {font-size: 1.6em; text-align: left} \ - h2 {font-size: 1em; text-align: left} \ - .copyright {font-size: 0.6em} \ - .caption {font-size: 0.6em}' - - def get_cover_url(self): - soup = self.index_to_soup(self.cover_source) - img_span = soup.find('span', {'class': re.compile('coverimage')}) - self.cover_url = img_span.find('img', src=True)['src'] - return self.cover_url - - def preprocess_html(self, soup): - # remove all articles without relevant content - tags = soup.findAll('li', {'class': 'tag-chain-item'}) - for li in tags: - if 'BILDERSTRECKE' in self.tag_to_string(li).upper(): - self.abort_article() - # remove list of tags - tags = soup.find('ul', {'class': 'tag-chain'}) - if tags: - tags.extract() - # remove all style attributes - for item in soup.findAll(style=True): - del item['style'] - # remove all local hyperlinks - for a in soup.findAll('a', {'href': True}): - if a['href'] and 'http' not in a['href']: - del a['href'] - # remove picture(s) of author(s) - for div in soup.findAll('div', {'class': 'ce_text block'}): - if div.find('hr'): - for hr in div.findAll('hr'): - hr.extract() - for img in div.findAll('img'): - img.extract() - return soup diff --git a/recipes/caravan_magazine_hindi.recipe b/recipes/caravan_magazine_hindi.recipe deleted file mode 100644 index e6a3326811..0000000000 --- a/recipes/caravan_magazine_hindi.recipe +++ /dev/null @@ -1,116 +0,0 @@ -#!/usr/bin/env python -# vim:fileencoding=utf-8 -# License: GPLv3 Copyright: 2015, Kovid Goyal - -import json - -from calibre.web.feeds.recipes import BasicNewsRecipe -from mechanize import Request - - -def classes(classes): - q = frozenset(classes.split(' ')) - return dict( - attrs={'class': lambda x: x and frozenset(x.split()).intersection(q)} - ) - - -class CaravanMagazineHindi(BasicNewsRecipe): - - title = 'Caravan Magazine in Hindi' - __author__ = 'Kovid Goyal, Gobelinus, Aareet Mahadevan' - description = 'An Indian Journal of politics and culture' - language = 'hi' - timefmt = ' [%b, %Y]' - encoding = 'utf-8' - needs_subscription = 'optional' - - no_stylesheets = True - - keep_only_tags = [ - classes('post-title short-desc author-details cover'), - dict(itemprop='articleBody'), - ] - - remove_tags = [ - dict(name='meta'), - dict(attrs={'class': ['share-with', 'img-wrap abs']}), - ] - remove_attributes = ['style'] - - def get_browser(self, *args, **kw): - br = BasicNewsRecipe.get_browser(self, *args, **kw) - if not self.username or not self.password: - return br - data = json.dumps({ - 'email': self.username, - 'name': '', - 'password': self.password - }) - if not isinstance(data, bytes): - data = data.encode('utf-8') - rq = Request( - url='https://caravanmagazine.in/api/users/login', - data=data, - headers={ - 'Accept': 'application/json, text/plain, */*', - 'Origin': 'https://caravanmagazine.in', - 'Referer': 'https://caravanmagazine.in/', - 'Content-type': 'application/json;charset=UTF-8', - }, - method='POST' - ) - res = br.open(rq).read() - res = res.decode('utf-8') - self.log('Login request response: {}'.format(res)) - res = json.loads(res) - if res['code'] != 200 or res['message'] != "Login success": - raise ValueError('Login failed, check your username and password') - return br - - # To parse article toc - def parse_index(self): - base_url = 'https://www.caravanmagazine.in/' - soup = self.index_to_soup('{0}magazine'.format(base_url)) - - # find current issue cover - feeds = [] - sections = soup.find( - attrs={ - 'class': lambda x: x and 'current-magazine-issue' in x.split() - } - ).find(attrs={'class': lambda x: x and 'sections' in x.split()}) - for section in sections.findAll( - attrs={'class': lambda x: x and 'section' in x.split()} - ): - a = section.find('a') - section_title = self.tag_to_string(a) - self.log('\nSection:', section_title) - articles = [] - for article in section.findAll('article'): - details = article.find( - attrs={'class': lambda x: x and 'details' in x.split()} - ) - pre = details.find( - attrs={'class': lambda x: x and 'pre-heading' in x.split()} - ) - if pre is not None: - pre.extract() - a = details.find('a') - url = base_url + a['href'].lstrip('/') + '-hindi' - title = self.tag_to_string(a) - desc = self.tag_to_string(details.find('div')) - self.log('\t', title, url) - articles.append({'title': title, 'description': desc, 'url': url}) - if articles: - feeds.append((section_title, articles)) - - return feeds - - def preprocess_html(self, soup): - for div in soup.findAll(itemprop='image'): - for img in div.findAll('img'): - img['src'] = div['content'] - for img in soup.findAll(attrs={'data-src': True}): - img['src'] = img['data-src'] - return soup diff --git a/recipes/carta.recipe b/recipes/carta.recipe deleted file mode 100644 index f8c2349b47..0000000000 --- a/recipes/carta.recipe +++ /dev/null @@ -1,51 +0,0 @@ -#!/usr/bin/env python -# vim:fileencoding=utf-8 -from __future__ import absolute_import, division, print_function, unicode_literals - -from calibre.web.feeds.news import BasicNewsRecipe - -__license__ = 'GPL v3' -__copyright__ = '2008, Kovid Goyal ' - - -class Carta(BasicNewsRecipe): - # Update 2017-09-01 - # Armin Geller - - title = u'Carta' - description = 'Authors blog for politics, economics and digital community' - __author__ = 'Armin Geller' # AGe Update 2017-09-01 - - timefmt = ' [%a %d %b %Y]' - oldest_article = 14 - max_articles_per_feed = 50 - no_stylesheets = True - remove_javascript = True - remove_empty_feeds = True - encoding = 'utf-8' - language = 'de' - - cover_url = 'http://www.carta.info/wp-content/themes/carta2014/img/carta-logo.svg' # AGe Update 2014-10-05 new cover - masthead_url = 'http://upload.wikimedia.org/wikipedia/de/b/ba/Carta_logo.png' - # masthead_url ='http://www.carta.info/wp-content/themes/carta2014/img/carta-logo.svg' - extra_css = ''' - h2 {font-size: 1.3em; font-style: italic} - .excerpt {font-size: 1.2em; font-style: italic} - ''' - - keep_only_tags = [ - dict(name='div', attrs={'class': ['article-text', 'author']}), - dict(name='p', attrs={'class': 'tags'}), - ] - - remove_tags = [ - dict(name='ul', attrs={'class': 'meta'}), - ] - - feeds = [ - (u'CARTA - Standard', u'http://feeds2.feedburner.com/carta-standard-rss'), - (u'CARTA - Homepage', u'http://feeds2.feedburner.com/carta-homepage-rss'), - (u'CARTA - Agenda', u'http://feeds2.feedburner.com/carta-agenda-rss'), - (u'CARTA - Ökonomie', u'http://feeds2.feedburner.com/carta-oekonomie-rss'), - (u'CARTA - Medien', u'http://feeds2.feedburner.com/carta-medien-rss'), - ] diff --git a/recipes/catholic_daily_readings.recipe b/recipes/catholic_daily_readings.recipe deleted file mode 100644 index 36ef704813..0000000000 --- a/recipes/catholic_daily_readings.recipe +++ /dev/null @@ -1,17 +0,0 @@ -from calibre.web.feeds.news import BasicNewsRecipe - - -class BasicUserRecipe1328971305(BasicNewsRecipe): - title = u'Catholic Daily Readings' - language = 'en' - __author__ = 'adoucette' - oldest_article = 7 - max_articles_per_feed = 100 - auto_cleanup = True - - feeds = [ - (u'Daily Readings - USCCB', u'http://www.usccb.org/bible/readings/rss/'), - (u'Daily Reflection - One Bread One Body', u'http://www.presentationministries.com/general/rss.asp'), - - (u'Mass Readings - Universalis', u'http://www.universalis.com/atommass3.xml'), - (u'Saint Of The Day - CNA', u'http://feeds.feedburner.com/catholicnewsagency/saintoftheday')] diff --git a/recipes/cd_action.recipe b/recipes/cd_action.recipe deleted file mode 100644 index 4d132a8830..0000000000 --- a/recipes/cd_action.recipe +++ /dev/null @@ -1,28 +0,0 @@ -from calibre.web.feeds.news import BasicNewsRecipe - - -class CD_Action(BasicNewsRecipe): - title = u'CD-Action' - __author__ = 'fenuks' - description = 'Strona CD-Action (CDA), największego w Polsce pisma dla graczy.Pełne wersje gier, newsy, recenzje, zapowiedzi, konkursy, forum, opinie, galerie screenów,trailery, filmiki, patche, teksty. Gry komputerowe (PC) oraz na konsole (PS3, XBOX 360).' # noqa - category = 'games' - language = 'pl' - index = 'http://www.cdaction.pl' - oldest_article = 8 - max_articles_per_feed = 100 - no_stylesheets = True - keep_only_tags = dict(id='news_content') - remove_tags_after = dict(name='div', attrs={'class': 'tresc'}) - feeds = [(u'Newsy', u'http://www.cdaction.pl/rss_newsy.xml')] - - def get_cover_url(self): - soup = self.index_to_soup('http://www.cdaction.pl/magazyn/') - self.cover_url = 'http://www.cdaction.pl' + \ - soup.find(id='wspolnik').div.a['href'] - return getattr(self, 'cover_url', self.cover_url) - - def preprocess_html(self, soup): - for a in soup.findAll('a', href=True): - if 'http://' not in a['href'] and 'https://' not in a['href']: - a['href'] = self.index + a['href'] - return soup diff --git a/recipes/cdrinfo_pl.recipe b/recipes/cdrinfo_pl.recipe deleted file mode 100644 index b7cf11496e..0000000000 --- a/recipes/cdrinfo_pl.recipe +++ /dev/null @@ -1,74 +0,0 @@ -__license__ = 'GPL v3' -import re - -from calibre.ebooks.BeautifulSoup import Comment -from calibre.web.feeds.news import BasicNewsRecipe - - -class cdrinfo(BasicNewsRecipe): - title = u'CDRinfo.pl' - __author__ = 'fenuks' - description = u'Serwis poświęcony archiwizacji danych. Testy i recenzje nagrywarek. Programy do nagrywania płyt. Dyski twarde, dyski SSD i serwery sieciowe NAS. Rankingi dyskow twardych, najszybsze dyski twarde, newsy, artykuły, testy, recenzje, porady, oprogramowanie. Zestawienie nagrywarek, najnowsze biosy do nagrywarek, programy dla dysków twardych.' # noqa - category = 'it, hardware' - # publication_type = '' - language = 'pl' - # encoding = '' - # extra_css = '' - cover_url = 'http://www.cdrinfo.pl/gfx/graph3/top.jpg' - # masthead_url = '' - use_embedded_content = False - oldest_article = 777 - max_articles_per_feed = 100 - no_stylesheets = True - remove_empty_feeds = True - remove_javascript = True - remove_attributes = ['style', 'onmouseover'] - preprocess_regexps = [(re.compile(u']*?>Uprzejmie prosimy o przestrzeganie netykiety.+?www\\.gravatar\\.com\\.

', re.DOTALL), lambda match: ''), - (re.compile(u']*?>.{,2}

', re.DOTALL), lambda match: '')] - ignore_duplicate_articles = {'title', 'url'} - - keep_only_tags = [ - dict(name='input', attrs={'name': 'ref'}), dict(id=['text', 'text2'])] - remove_tags = [dict(attrs={'class': ['navigation', 'sociable', 'last6news']}), dict( - name=['hr', 'br']), dict(id='respond')] - remove_tags_after = dict(id='artnawigacja') - feeds = [(u'Wiadomości', 'http://feeds.feedburner.com/cdrinfo'), - (u'Recenzje', 'http://www.cdrinfo.pl/rss/rss_recenzje.php'), - (u'Konsole', 'http://konsole.cdrinfo.pl/rss/rss_konsole_news.xml'), - (u'Pliki', 'http://www.cdrinfo.pl/rss/rss_pliki.xml') - ] - - def preprocess_html(self, soup): - if soup.find(id='artnawigacja'): - self.append_page(soup, soup.body) - return soup - - def append_page(self, soup, appendtag): - baseurl = 'http://cdrinfo.pl' + \ - soup.find(name='input', attrs={'name': 'ref'})['value'] + '/' - if baseurl[-2] == '/': - baseurl = baseurl[:-1] - tag = soup.find(id='artnawigacja') - div = tag.find('div', attrs={'align': 'right'}) - while div: - counter = 0 - while counter < 5: - try: - soup2 = self.index_to_soup(baseurl + div.a['href']) - break - except: - counter += 1 - tag2 = soup2.find(id='artnawigacja') - div = tag2.find('div', attrs={'align': 'right'}) - pagetext = soup2.find(attrs={'class': 'art'}) - comments = pagetext.findAll( - text=lambda text: isinstance(text, Comment)) - for comment in comments: - comment.extract() - for r in soup2.findAll(attrs={'class': 'star-rating'}): - r.extract() - for r in soup2.findAll(attrs={'class': 'star-rating2'}): - r.extract() - pos = len(appendtag.contents) - appendtag.insert(pos, pagetext) - tag.extract() diff --git a/recipes/ceska_pozice.recipe b/recipes/ceska_pozice.recipe deleted file mode 100644 index e55c1e50e5..0000000000 --- a/recipes/ceska_pozice.recipe +++ /dev/null @@ -1,70 +0,0 @@ -# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai -from __future__ import unicode_literals - -from calibre.web.feeds.recipes import BasicNewsRecipe - - -class ceskaPoziceRecipe(BasicNewsRecipe): - __author__ = 'bubak' - title = u'Česká pozice' - description = 'Česká pozice' - oldest_article = 2 - max_articles_per_feed = 20 - - feeds = [ - (u'Všechny články', u'http://www.ceskapozice.cz/rss.xml'), - (u'Domov', u'http://www.ceskapozice.cz/taxonomy/term/16/feed'), - (u'Chrono', u'http://www.ceskapozice.cz/chrono/feed'), - (u'Evropa', u'http://www.ceskapozice.cz/taxonomy/term/17/feed') - ] - - language = 'cs' - cover_url = 'http://www.ceskapozice.cz/sites/default/files/cpozice_logo.png' - remove_javascript = True - no_stylesheets = True - domain = u'http://www.ceskapozice.cz' - use_embedded_content = False - - remove_tags = [dict(name='div', attrs={'class': ['block-ad', 'region region-content-ad']}), - dict(name='ul', attrs={'class': 'links'}), - dict(name='div', attrs={ - 'id': ['comments', 'back-to-top']}), - dict(name='div', attrs={ - 'class': ['next-page', 'region region-content-ad']}), - dict(name='cite')] - - keep_only_tags = [dict(name='div', attrs={'id': 'content'})] - - visited_urls = {} - - def get_article_url(self, article): - url = BasicNewsRecipe.get_article_url(self, article) - if url in self.visited_urls: - self.log.debug('Ignoring duplicate: ' + url) - return None - else: - self.visited_urls[url] = True - self.log.debug('Accepting: ' + url) - return url - - def preprocess_html(self, soup): - self.append_page(soup, soup.body, 3) - return soup - - def append_page(self, soup, appendtag, position): - pager = soup.find('div', attrs={'class': 'paging-bottom'}) - if pager: - nextbutton = pager.find('li', attrs={'class': 'pager-next'}) - if nextbutton: - nexturl = self.domain + nextbutton.a['href'] - soup2 = self.index_to_soup(nexturl) - texttag = soup2.find('div', attrs={'class': 'main-body'}) - for it in texttag.findAll('div', attrs={'class': 'region region-content-ad'}): - it.extract() - for it in texttag.findAll('cite'): - it.extract() - newpos = len(texttag.contents) - self.append_page(soup2, texttag, newpos) - texttag.extract() - appendtag.insert(position, texttag) - pager.extract() diff --git a/recipes/cesky_rozhlas_6.recipe b/recipes/cesky_rozhlas_6.recipe deleted file mode 100644 index e9975d801c..0000000000 --- a/recipes/cesky_rozhlas_6.recipe +++ /dev/null @@ -1,27 +0,0 @@ -# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai -from __future__ import unicode_literals - -from calibre.web.feeds.recipes import BasicNewsRecipe - - -class cro6Recipe(BasicNewsRecipe): - __author__ = 'bubak' - title = u'Český rozhlas 6' - description = 'Český rozhlas 6' - oldest_article = 1 - max_articles_per_feed = 20 - - feeds = [ - (u'Český rozhlas 6', u'http://www.rozhlas.cz/export/cro6/') - ] - - language = 'cs' - cover_url = 'http://www.rozhlas.cz/img/e5/logo/cro6.png' - remove_javascript = True - no_stylesheets = True - - remove_attributes = [] - remove_tags = [dict(name='div', attrs={'class': ['audio-play-all', 'poradHeaders', 'actions']}), - dict(name='p', attrs={'class': ['para-last']})] - - keep_only_tags = [dict(name='div', attrs={'id': 'article'})] diff --git a/recipes/cetnixploitation.recipe b/recipes/cetnixploitation.recipe deleted file mode 100644 index c6f1dd2706..0000000000 --- a/recipes/cetnixploitation.recipe +++ /dev/null @@ -1,34 +0,0 @@ - -__license__ = 'GPL v3' -__copyright__ = '2010, Darko Miletic ' -''' -chetnixploitation.blogspot.com -''' - -import re - -from calibre.web.feeds.news import BasicNewsRecipe - - -class Chetnixploitation(BasicNewsRecipe): - title = 'Chetnixploitation' - __author__ = 'Darko Miletic' - description = 'Filmski blog' - oldest_article = 7 - max_articles_per_feed = 100 - language = 'sr' - publication_type = 'blog' - encoding = 'utf-8' - no_stylesheets = True - use_embedded_content = True - extra_css = ' @font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: "Trebuchet MS",Trebuchet,Verdana,sans1,sans-serif} .article_description{font-family: sans1, sans-serif} img{margin-bottom: 0.8em; border: 1px solid #333333; padding: 4px } ' # noqa - - conversion_options = { - 'comment': description, 'tags': 'film, blog, cetnici, srbija, ex-yu', 'publisher': 'Son of Man', 'language': language - } - - preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')] - feeds = [(u'Posts', u'http://chetnixploitation.blogspot.com/feeds/posts/default')] - - def preprocess_html(self, soup): - return self.adeify_images(soup) diff --git a/recipes/cgm_pl.recipe b/recipes/cgm_pl.recipe deleted file mode 100644 index ce529b661b..0000000000 --- a/recipes/cgm_pl.recipe +++ /dev/null @@ -1,48 +0,0 @@ -from calibre.ebooks.BeautifulSoup import BeautifulSoup -from calibre.web.feeds.news import BasicNewsRecipe - - -class CGM(BasicNewsRecipe): - title = u'CGM' - oldest_article = 7 - __author__ = 'fenuks' - description = u'Codzienna Gazeta Muzyczna' - masthead_url = 'http://www.cgm.pl/img/header/logo.gif' - cover_url = 'http://www.krafcy.com/foto/tinymce/Image/cgm%281%29.jpg' - category = 'music' - language = 'pl' - use_embedded_content = False - remove_empty_feeds = True - max_articles_per_feed = 100 - no_stylesheets = True - extra_css = 'div {color:black;} strong {color:black;} span {color:black;} p {color:black;} h2 {color:black;} img {display: block;} ul.galleryImagesList {list-style: none;} li.item {float: left;} .calibrenavbar {clear: both;}' # noqa - remove_tags_before = dict(id='mainContent') - remove_tags_after = dict(name='div', attrs={'class': 'fbContainer'}) - remove_tags = [dict(name='div', attrs={'class': ['fbContainer', 'socials']}), - dict(name='p', attrs={ - 'class': ['tagCloud', 'galleryAuthor']}), - dict(id=['movieShare', 'container']), dict(name='br')] - feeds = [(u'Informacje', u'http://www.cgm.pl/rss.xml'), (u'Polecamy', u'http://www.cgm.pl/rss,4,news.xml'), - (u'Recenzje', u'http://www.cgm.pl/rss,1,news.xml')] - - def preprocess_html(self, soup): - gallery = soup.find('div', attrs={'class': 'galleryFlash'}) - if gallery and gallery.div: - img = gallery.div - gallery.img.extract() - if img: - img = img['style'] - img = 'http://www.cgm.pl' + \ - img[img.find('url(') + 4:img.find(')')] - gallery.contents[1].name = 'img' - gallery.contents[1]['src'] = img - pos = len(gallery.contents) - gallery.insert(pos, BeautifulSoup('
')) - - for item in soup.findAll(style=True): - del item['style'] - ad = soup.findAll('a') - for r in ad: - if 'www.hustla.pl' in r['href'] or 'www.ebilet.pl' in r['href']: - r.extract() - return soup diff --git a/recipes/chicago_breaking_news.recipe b/recipes/chicago_breaking_news.recipe deleted file mode 100644 index 172e574671..0000000000 --- a/recipes/chicago_breaking_news.recipe +++ /dev/null @@ -1,44 +0,0 @@ -#!/usr/bin/env python - -__license__ = 'GPL v3' -__copyright__ = '2009, Darko Miletic ' -''' -chicagobreakingnews.com -''' - -from calibre.web.feeds.news import BasicNewsRecipe - - -class ChicagoBreakingNews(BasicNewsRecipe): - title = 'Chicago Breaking News' - __author__ = 'Darko Miletic' - description = 'Breaking News from Chicago' - oldest_article = 1 - max_articles_per_feed = 100 - no_stylesheets = True - use_embedded_content = True - publisher = 'Chicago Breaking News' - category = 'news, politics, USA, Chicago' - encoding = 'utf8' - language = 'en' - - html2lrf_options = [ - '--comment', description, '--category', category, '--publisher', publisher - ] - - html2epub_options = 'publisher="' + publisher + \ - '"\ncomments="' + description + '"\ntags="' + category + '"' - - feeds = [(u'Breaking news', u'http://feeds2.feedburner.com/ChicagoBreakingNews/')] - - def preprocess_html(self, soup): - for item in soup.findAll('a'): - if item['href'].find('http://feedads.googleadservices.com') > -1: - item.extract() - for item in soup.findAll(style=True): - del item['style'] - for item in soup.findAll(color=True): - del item['color'] - for item in soup.findAll(size=True): - del item['size'] - return soup diff --git a/recipes/china_economic_net.recipe b/recipes/china_economic_net.recipe deleted file mode 100644 index c63ee43b70..0000000000 --- a/recipes/china_economic_net.recipe +++ /dev/null @@ -1,56 +0,0 @@ -#!/usr/bin/env python -# vim:fileencoding=utf-8 -from calibre.web.feeds.news import BasicNewsRecipe - - -class AdvancedUserRecipe1278162597(BasicNewsRecipe): - __author__ = 'rty' - title = u'China Economic Net' - oldest_article = 7 - max_articles_per_feed = 100 - - publisher = 'www.ce.cn - China Economic net - Beijing' - description = 'China Economic Net Magazine' - category = 'Economic News Magazine, Chinese, China' - - recipe_specific_options = { - 'days': { - 'short': 'Oldest article to download from this news source. In days ', - 'long': 'For example, 0.5, gives you articles from the past 12 hours', - 'default': str(oldest_article) - } - } - - def __init__(self, *args, **kwargs): - BasicNewsRecipe.__init__(self, *args, **kwargs) - d = self.recipe_specific_options.get('days') - if d and isinstance(d, str): - self.oldest_article = float(d) - - feeds = [ - (u'Stock Market 股市', u'http://finance.ce.cn/stock/index_6304.xml'), - (u'Money 理财', u'http://finance.ce.cn/money/index_6301.xml'), - (u'Health 健康', u'http://www.ce.cn/health/index_6294.xml'), - (u'Technology 科技', u'http://sci.ce.cn/mainpage/index_6307.xml'), - (u'Domestic Politics 国内时政', u'http://www.ce.cn/xwzx/gnsz/index_6273.xml') - ] - masthead_url = 'http://finance.ce.cn/images/08mdy_logo.gif' - extra_css = ''' - @font-face {font-family: "DroidFont", serif, sans-serif; src: url(res:///system/fonts/DroidSansFallback.ttf); }\n - body {margin-right: 8pt; font-family: 'DroidFont', serif;}\n - h1 {font-family: 'DroidFont', serif;}\n - .articledescription {font-family: 'DroidFont', serif;} - ''' - remove_javascript = True - use_embedded_content = False - no_stylesheets = True - language = 'zh_CN' - encoding = 'gb2312' - conversion_options = {'linearize_tables': True} - - keep_only_tags = [ - - dict(name='h1', attrs={'id': 'articleTitle'}), - dict(name='div', attrs={'class': 'laiyuan'}), - dict(name='div', attrs={'id': 'articleText'}), - ] diff --git a/recipes/china_times.recipe b/recipes/china_times.recipe deleted file mode 100644 index b9b77e5e27..0000000000 --- a/recipes/china_times.recipe +++ /dev/null @@ -1,41 +0,0 @@ -# -*- coding: utf-8 -*- -__license__ = 'GPL v3' -# dug from http://www.mobileread.com/forums/showthread.php?p=1012294 - -from calibre.web.feeds.news import BasicNewsRecipe - - -class AdvancedUserRecipe1277443634(BasicNewsRecipe): - title = u'中時電子報' - oldest_article = 1 - max_articles_per_feed = 100 - - feeds = [(u'焦點要聞', u'http://feeds.feedburner.com/chinatimes/chinatimes-focus'), - (u'生活新聞', u'http://feeds.feedburner.com/chinatimes/chinatimes-life'), - (u'社會新聞', u'http://feeds.feedburner.com/chinatimes/chinatimes-society'), - (u'兩岸國際', u'http://feeds.feedburner.com/chinatimes/chinatimes-international'), - (u'時論廣場', u'http://feeds.feedburner.com/chinatimes/chinatimes-comment'), - (u'藝文副刊', u'http://feeds.feedburner.com/chinatimes/chinatimes-philology'), - (u'地方新聞', u'http://feeds.feedburner.com/chinatimes/chinatimes-local'), - (u'財經焦點', u'http://feeds.feedburner.com/chinatimes/chinatimes-finance'), - (u'運動天地', u'http://feeds.feedburner.com/chinatimes/chinatimes-sport'), - (u'娛樂新聞', u'http://feeds.feedburner.com/chinatimes/chinatimes-showbiz'), - (u'時尚消費', u'http://feeds.feedburner.com/chinatimes/chinatimes-fashion'), - # (u'財經', u'http://rss.chinatimes.com/rss/finance-u.rss'), # broken links - # (u'股市', u'http://rss.chinatimes.com/rss/stock-u.rss') # broken links - ] - - __author__ = 'einstuerzende, updated by Eddie Lau' - __version__ = '1.1' - language = 'zh' - publisher = 'China Times Group' - description = 'China Times (Taiwan)' - category = 'News, Chinese, Taiwan' - remove_javascript = True - use_embedded_content = False - no_stylesheets = True - auto_cleanup = True - encoding = 'utf-8' - conversion_options = {'linearize_tables': True} - masthead_url = 'http://www.fcuaa.org/gif/chinatimeslogo.gif' - cover_url = 'http://www.fcuaa.org/gif/chinatimeslogo.gif' diff --git a/recipes/chipro.recipe b/recipes/chipro.recipe deleted file mode 100644 index 799880e888..0000000000 --- a/recipes/chipro.recipe +++ /dev/null @@ -1,46 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- - -__license__ = 'GPL v3' -__copyright__ = u'2011, Silviu Cotoar\u0103' -''' -chip.ro -''' - -from calibre.web.feeds.news import BasicNewsRecipe - - -class ChipRo(BasicNewsRecipe): - title = u'Chip Online' - __author__ = u'Silviu Cotoar\u0103' - description = 'Chip Online' - publisher = 'Chip Online' - oldest_article = 5 - language = 'ro' - max_articles_per_feed = 100 - no_stylesheets = True - use_embedded_content = False - category = 'Ziare,Reviste,IT' - encoding = 'utf-8' - cover_url = 'http://www.chip.ro/images/logo.png' - - conversion_options = { - 'comments': description, 'tags': category, 'language': language, 'publisher': publisher - } - - keep_only_tags = [ - dict(name='h2', attrs={'class': 'contentheading clearfix'}), dict(name='span', attrs={ - 'class': 'createby'}), dict(name='div', attrs={'class': 'article-content'}) - ] - - remove_tags = [ - dict(name='div', attrs={'class': ['sharemecompactbutton']}), dict(name='div', attrs={'align': ['left']}), dict(name='div', attrs={ - 'align': ['center']}), dict(name='th', attrs={'class': ['pagenav_prev']}), dict(name='table', attrs={'class': ['pagenav']}) - ] - - feeds = [ - (u'Feeds', u'http://www.chip.ro/index.php?option=com_ninjarsssyndicator&feed_id=9&format=raw') - ] - - def preprocess_html(self, soup): - return self.adeify_images(soup) diff --git a/recipes/chosun.recipe b/recipes/chosun.recipe deleted file mode 100644 index 6c1ea48fb6..0000000000 --- a/recipes/chosun.recipe +++ /dev/null @@ -1,29 +0,0 @@ -# -*- coding: utf-8 -*- -__license__ = 'GPL v3' -__copyright__ = '2015, Hoje Lee ' -''' -Profile to download Chosun.com -''' - -from calibre.web.feeds.news import BasicNewsRecipe - - -class ChosunDotcom(BasicNewsRecipe): - language = 'ko' - title = u'조선일보' - description = u'조선닷컴 기사' - __author__ = 'Hoje Lee' - oldest_article = 7 - max_articles_per_feed = 10 - auto_cleanup = True - - feeds = [ - (u'정치', 'http://www.chosun.com/site/data/rss/politics.xml'), - (u'조선비즈', 'http://biz.chosun.com/site/data/rss/rss.xml'), - (u'사회', 'http://www.chosun.com/site/data/rss/national.xml'), - (u'문화', 'http://www.chosun.com/site/data/rss/culture.xml'), - (u'국제', 'http://www.chosun.com/site/data/rss/international.xml'), - (u'오피니언', 'http://www.chosun.com/site/data/rss/editorials.xml'), - (u'스포츠', 'http://www.chosun.com/site/data/rss/sports.xml'), - (u'연예', 'http://www.chosun.com/site/data/rss/ent.xml'), - ] diff --git a/recipes/cinco_dias.recipe b/recipes/cinco_dias.recipe deleted file mode 100644 index 5201403fd4..0000000000 --- a/recipes/cinco_dias.recipe +++ /dev/null @@ -1,71 +0,0 @@ -__license__ = 'GPL v3' -__author__ = 'Luis Hernandez' -__copyright__ = 'Luis Hernandez' -__version__ = 'v1.2' -__date__ = '31 January 2011' - -''' -http://www.cincodias.com/ -''' - -from calibre.web.feeds.news import BasicNewsRecipe - - -class AdvancedUserRecipe1294946868(BasicNewsRecipe): - - title = u'Cinco Dias' - publisher = u'Grupo Prisa' - - __author__ = 'Luis Hernandez' - description = 'spanish web about money and business, free edition' - - cover_url = 'http://www.prisa.com/images/logos/logo_cinco_dias.gif' - oldest_article = 2 - max_articles_per_feed = 100 - - remove_javascript = True - no_stylesheets = True - use_embedded_content = False - - language = 'es' - remove_empty_feeds = True - encoding = 'ISO-8859-1' - timefmt = '[%a, %d %b, %Y]' - - keep_only_tags = [ - dict(name='div', attrs={'class': ['cab_articulo cab_noticia', 'pos_3', 'txt_noticia', 'mod_despiece']}), dict( - name='p', attrs={'class': ['cintillo']}) - ] - - remove_tags_before = dict(name='div', attrs={'class': ['publi_h']}) - remove_tags_after = dict( - name='div', attrs={'class': ['tab_util util_estadisticas']}) - - remove_tags = [ - dict(name='div', attrs={'class': ['util-1', 'util-2', 'util-3', 'inner estirar', 'inner1', 'inner2', 'inner3', 'cont', 'tab_util util_estadisticas', 'tab_util util_enviar', 'mod_list_inf', 'mod_similares', 'mod_divisas', 'mod_sectores', 'mod_termometro', 'mod post', 'mod_img', 'mod_txt', 'nivel estirar', 'barra estirar', 'info_brujula btnBrujula', 'utilidad_brujula estirar']}), dict(name='li', attrs={'class': ['lnk-fcbook', 'lnk-retweet', 'lnk-meneame', 'desplegable', 'comentarios', 'list-options', 'estirar']}), dict(name='ul', attrs={'class': ['lista-izquierda', 'list-options', 'estirar']}), dict(name='p', attrs={'class': ['autor']}) # noqa - ] - - extra_css = """ - p{text-align: justify; font-size: 100%} - body{ text-align: left; font-size:100% } - h1{font-family: sans-serif; font-size:150%; font-weight:bold; text-align: justify; } - h3{font-family: sans-serif; font-size:100%; font-style: italic; text-align: justify; } - """ - - feeds = [ - - (u'Ultima Hora', u'http://www.cincodias.com/rss/feed.html?feedId=17029'), - (u'Empresas', u'http://www.cincodias.com/rss/feed.html?feedId=19'), - (u'Mercados', u'http://www.cincodias.com/rss/feed.html?feedId=20'), - (u'Economia', u'http://www.cincodias.com/rss/feed.html?feedId=21'), - (u'Tecnorama', u'http://www.cincodias.com/rss/feed.html?feedId=17230'), - (u'Tecnologia', u'http://www.cincodias.com/rss/feed.html?feedId=17106'), - (u'Finanzas Personales', u'http://www.cincodias.com/rss/feed.html?feedId=22'), - (u'Fiscalidad', u'http://www.cincodias.com/rss/feed.html?feedId=17107'), - (u'Vivienda', u'http://www.cincodias.com/rss/feed.html?feedId=17108'), - (u'Tendencias', u'http://www.cincodias.com/rss/feed.html?feedId=17109'), - (u'Empleo', u'http://www.cincodias.com/rss/feed.html?feedId=17110'), - (u'IBEX 35', u'http://www.cincodias.com/rss/feed.html?feedId=17125'), - (u'Sectores', u'http://www.cincodias.com/rss/feed.html?feedId=17126'), - (u'Opinion', u'http://www.cincodias.com/rss/feed.html?feedId=17105') - ] diff --git a/recipes/cinebel_be.recipe b/recipes/cinebel_be.recipe deleted file mode 100644 index cf5fca41dd..0000000000 --- a/recipes/cinebel_be.recipe +++ /dev/null @@ -1,46 +0,0 @@ -#!/usr/bin/env python - -__license__ = 'GPL v3' -__copyright__ = '2008-2011, Lionel Bergeret ' -''' -cinebel.be -''' - -from calibre.web.feeds.news import BasicNewsRecipe - - -class Cinebel(BasicNewsRecipe): - title = u'Cinebel' - __author__ = u'Lionel Bergeret' - description = u'Cinema news from Belgium in French' - publisher = u'cinebel.be' - category = 'news, cinema, movie, Belgium' - oldest_article = 15 - language = 'fr' - - max_articles_per_feed = 20 - no_stylesheets = True - use_embedded_content = False - timefmt = ' [%d %b %Y]' - filterDuplicates = True - - keep_only_tags = [ - dict(name='span', attrs={'class': 'movieMainTitle'}), dict(name='div', attrs={'id': 'filmPoster'}), dict( - name='div', attrs={'id': 'filmDefinition'}), dict(name='div', attrs={'id': 'synopsis'}) - ] - - feeds = [ - - (u'Les sorties de la semaine', u'http://www.cinebel.be/Servlets/RssServlet?languageCode=fr&rssType=0'), - (u'Top 10', u'http://www.cinebel.be/Servlets/RssServlet?languageCode=fr&rssType=2') - ] - - def preprocess_html(self, soup): - for alink in soup.findAll('a', href=True): - tstr = "Site officiel: " + alink['href'] - alink.replaceWith(tstr) - return soup - - def get_cover_url(self): - cover_url = 'http://www.cinebel.be/portal/resources/common/logo_index.gif' - return cover_url diff --git a/recipes/cio.recipe b/recipes/cio.recipe deleted file mode 100644 index 99dc6c1d7a..0000000000 --- a/recipes/cio.recipe +++ /dev/null @@ -1,131 +0,0 @@ -#!/usr/bin/env python -__license__ = 'GPL v3' -__author__ = 'Lorenzo Vigentini' -__copyright__ = '2009, Lorenzo Vigentini ' -__version__ = 'v1.01' -__date__ = '14, January 2010' -__description__ = 'CIO is the leading information brand for today s busy chief information officer. ' - -''' -http://www.cio.co.uk/ -''' - -from calibre.ptempfile import PersistentTemporaryFile -from calibre.web.feeds.news import BasicNewsRecipe - - -class cio(BasicNewsRecipe): - __author__ = 'Lorenzo Vigentini' - description = 'CIO is the leading information brand for today\'s busy chief information officer.' - cover_url = 'http://media.cio.co.uk/graphics/shared/cio-logo.gif' - - title = 'CIO ' - publisher = 'IDG Communication' - category = 'IT, technology, business, industry' - - language = 'en' - timefmt = '[%a, %d %b, %Y]' - - oldest_article = 7 - max_articles_per_feed = 10 - use_embedded_content = False - recursion = 10 - - remove_javascript = True - no_stylesheets = True - - temp_files = [] - articles_are_obfuscated = True - - def get_obfuscated_article(self, url): - br = self.get_browser() - br.open(url) - response = br.follow_link(url_regex='&print&intcmp=ROSATT2$', nr=0) - html = response.read() - self.temp_files.append(PersistentTemporaryFile('_fa.html')) - self.temp_files[-1].write(html) - self.temp_files[-1].close() - return self.temp_files[-1].name - - keep_only_tags = [ - dict(name='div', attrs={'id': 'mainContent'}) - ] - - feeds = [ - (u'News', u'http://www.cio.co.uk/rss/feeds/cio-news.xml'), - (u'Debate', u'http://www.cio.co.uk/rss/feeds/cio-debate.xml'), - (u'Analysis', u'http://www.cio.co.uk/rss/feeds/cio-analysis.xml'), - (u'Opinion', u'http://www.cio.co.uk/rss/feeds/cio-opinion.xml'), - (u'In-Depth', u'http://www.cio.co.uk/rss/feeds/cio-in-depth.xml'), - (u'Change management', - u'http://www.cio.co.uk/rss/feeds/cio-change-management-management.xml'), - (u'Regulatory compliance', - u'http://www.cio.co.uk/rss/feeds/cio-regulatory-compliance-management.xml'), - (u'Business strategy', - u'http://www.cio.co.uk/rss/feeds/cio-business-strategy-management.xml'), - (u'Technology', u'http://www.cio.co.uk/rss/feeds/cio-technology-management.xml'), - (u'Security', u'http://www.cio.co.uk/rss/feeds/cio-security-management.xml'), - (u'Soft skills', u'http://www.cio.co.uk/rss/feeds/cio-soft-skills-management.xml'), - (u'The CIO career', - u'http://www.cio.co.uk/rss/feeds/cio-cio-career-management.xml'), - (u'Budgets', u'http://www.cio.co.uk/rss/feeds/cio-budgets-management.xml'), - (u'Supplier management', - u'http://www.cio.co.uk/rss/feeds/cio-supplier-management-management.xml'), - (u'Board politics', - u'http://www.cio.co.uk/rss/feeds/cio-board-politics-management.xml'), - (u'Enterprise software', - u'http://www.cio.co.uk/rss/feeds/cio-enterprise-software-technology.xml'), - (u'Mobile and wireless', - u'http://www.cio.co.uk/rss/feeds/cio-mobile-wireless-technology.xml'), - (u'Security', u'http://www.cio.co.uk/rss/feeds/cio-security-technology.xml'), - (u'Storage', u'http://www.cio.co.uk/rss/feeds/cio-storage-technology.xml'), - (u'Desktop and client', - u'http://www.cio.co.uk/rss/feeds/cio-desktop-client-technology.xml'), - (u'Outsourcing', u'http://www.cio.co.uk/rss/feeds/cio-outsourcing-technology.xml'), - (u'Internet and e-commerce', - u'http://www.cio.co.uk/rss/feeds/cio-internet-technology.xml'), - (u'Database management', - u'http://www.cio.co.uk/rss/feeds/cio-database-management-technology.xml'), - (u'Communications and networking ', - u'http://www.cio.co.uk/rss/feeds/cio-communication-networking-technology.xml'), - (u'Grid computing', - u'http://www.cio.co.uk/rss/feeds/cio-grid-computing-cloud-technology.xml'), - (u'Enterprise search', - u'http://www.cio.co.uk/rss/feeds/cio-enterprise-search-technology.xml'), - (u'CRM ', u'http://www.cio.co.uk/rss/feeds/cio-crm-technology.xml'), - (u'Ade McCormack ', - u'http://www.cio.co.uk/rss/feeds/cio-opinion-ade-mccormack.xml'), - (u'Andy Hayler ', - u'http://www.cio.co.uk/rss/feeds/cio-opinion-andy-hayler.xml'), - (u'CEB ', u'http://www.cio.co.uk/rss/feeds/cio-opinion-ceb.xml'), - (u'CIO Staff ', u'http://www.cio.co.uk/rss/feeds/cio-opinion-cio-staff.xml'), - (u'Dave Pepperell ', - u'http://www.cio.co.uk/rss/feeds/cio-opinion-dave-pepperell.xml'), - (u'Elliot Limb ', - u'http://www.cio.co.uk/rss/feeds/cio-opinion-elliot-limb.xml'), - (u'Freeform Dynamics ', - u'http://www.cio.co.uk/rss/feeds/cio-opinion-freeform-dynamics.xml'), - (u'Giles Nelson ', - u'http://www.cio.co.uk/rss/feeds/cio-opinion-giles-nelson.xml'), - (u'Mark Chillingworth ', - u'http://www.cio.co.uk/rss/feeds/cio-opinion-mark-chillingworth.xml'), - (u'Martin Veitch ', - u'http://www.cio.co.uk/rss/feeds/cio-opinion-martin-veitch.xml'), - (u'Mike Altendorf ', - u'http://www.cio.co.uk/rss/feeds/cio-opinion-mike-altendorf.xml'), - (u'Richard Steel ', - u'http://www.cio.co.uk/rss/feeds/cio-opinion-richard-steel.xml'), - (u'Richard Sykes ', - u'http://www.cio.co.uk/rss/feeds/cio-opinion-richard-sykes.xml'), - (u'Rob Llewellyn ', - u'http://www.cio.co.uk/rss/feeds/cio-opinion-rob-llewellyn.xml'), - (u'Free thinking ', - u'http://www.cio.co.uk/rss/feeds/cio-blog-free-thinking.xml'), - (u'Leading CIOs ', - u'http://www.cio.co.uk/rss/feeds/cio-blog-leading-cios.xml'), - (u'CIO News View ', - u'http://www.cio.co.uk/rss/feeds/cio-blog-cio-news-view.xml'), - (u'CIO Blog ', u'http://www.cio.co.uk/rss/feeds/cio-blog-cio-blog.xml'), - (u'Transformation CIO ', - u'http://www.cio.co.uk/rss/feeds/cio-blog-transformation-cio.xml') - ] diff --git a/recipes/cio_magazine.recipe b/recipes/cio_magazine.recipe deleted file mode 100644 index 813e3445cb..0000000000 --- a/recipes/cio_magazine.recipe +++ /dev/null @@ -1,147 +0,0 @@ -from __future__ import print_function - -import re - -# Para convertir el tiempo del articulo -import string - -# sys no hace falta... lo intente usar para escribir en stderr -from calibre import strftime - -# Los primeros comentarios son las dificultades que he tenido con el Piton -# Cuando da error UTF8 revisa los comentarios (acentos). En notepad++ Search, Goto, posicion y lo ves. -# Editar con Notepad++ Si pone - donde no debe es que ha indentado mal... Edit - Blank operations - tab to space -# He entendido lo que significa el from... son paths dentro de pylib.zip... -# Con from importa solo un simbolo...con import,la libreria completa -from calibre.web.feeds.news import BasicNewsRecipe - -# Para usar expresiones regulares -# Visto en pylib.zip... la primera letra es mayuscula -# Estas dos ultimas han sido un vago intento de establecer una cookie (no -# usado) - - -class CIO_Magazine(BasicNewsRecipe): - title = 'CIO Magazine' - oldest_article = 14 - max_articles_per_feed = 100 - auto_cleanup = True - __author__ = 'Julio Map' - description = 'CIO is the leading information brand for today-s busy Chief information Officer - CIO Magazine bi-monthly ' - language = 'en' - encoding = 'utf8' - cover_url = 'http://www.cio.com/homepage/images/hp-cio-logo-linkedin.png' - - remove_tags_before = dict(name='div', attrs={'id': 'container'}) -# Absolutamente innecesario... al final he visto un print_version (ver mas -# adelante) - -# Dentro de una revista dada... -# issue_details contiene el titulo y las secciones de este ejemplar -# DetailModule esta dentro de issue_details contiene las urls y resumenes -# Dentro de un articulo dado... -# Article-default-body contiene el texto. Pero como digo, he encontrado -# una print_version - - no_stylesheets = True - remove_javascript = True - - def print_version(self, url): - # A esta funcion le llama el sistema... no hay que llamarla uno mismo (porque seria llamada dos veces) - # Existe una version imprimible de los articulos cambiando - # http://www.cio.com/article// por - # http://www.cio.com/article/print/ que contiene todas las paginas - # dentro del div id=container - if url.startswith('/'): - url = 'http://www.cio.com' + url - segments = url.split('/') - printURL = '/'.join(segments[0:4]) + '/print/' + segments[4] + '#' - return printURL - - def parse_index(self): - ####################################################################### - # This method should be implemented in recipes that parse a website - # instead of feeds to generate a list of articles. Typical uses are for - # news sources that have a Print Edition webpage that lists all the - # articles in the current print edition. If this function is implemented, - # it will be used in preference to BasicNewsRecipe.parse_feeds(). - # - # It must return a list. Each element of the list must be a 2-element - # tuple of the form ('feed title', list of articles). - # - # Each list of articles must contain dictionaries of the form: - # - # { - # 'title' : article title, - # 'url' : URL of print version, - # 'date' : The publication date of the article as a string, - # 'description' : A summary of the article - # 'content' : The full article (can be an empty string). This is used by FullContentProfile - # } - # - # For an example, see the recipe for downloading The Atlantic. - # In addition, you can add 'author' for the author of the article. - ####################################################################### - - # Primero buscamos cual es la ultima revista que se ha creado - soupinicial = self.index_to_soup('http://www.cio.com/magazine') - # Es el primer enlace que hay en el DIV con class content_body - a = soupinicial.find( - True, attrs={'class': 'content_body'}).find('a', href=True) - INDEX = re.sub(r'\?.*', '', a['href']) - # Como cio.com usa enlaces relativos, le anteponemos el domain name. - if INDEX.startswith('/'): # protegiendonos de que dejen de usarlos - INDEX = 'http://www.cio.com' + INDEX - # Y nos aseguramos en los logs que lo estamos haciendo bien - print("INDEX en parse_index: ", INDEX) - - # Ya sabemos cual es la revista... procesemosla. - soup = self.index_to_soup(INDEX) - - articles = {} - key = None - feeds = [] - # Para empezar nos quedamos solo con dos DIV, 'heading' y ' issue_item' - # Del primero sacamos las categorias (key) y del segundo las urls y - # resumenes - for div in soup.findAll(True, - attrs={'class': ['heading', 'issue_item']}): - - if ''.join(div['class']) == 'heading': - key = string.capwords(self.tag_to_string(div.span)) - print("Key: ", key) # Esto es para depurar - articles[key] = [] - feeds.append(key) - - elif ''.join(div['class']) == 'issue_item': - a = div.find('a', href=True) - if not a: - continue - url = re.sub(r'\?.*', '', a['href']) - print("url: ", url) # Esto es para depurar - # Ya para nota, quitar al final las dos ultimas palabras - title = self.tag_to_string(a, use_alt=True).strip() - # No es la fecha de publicacion sino la de colecta - pubdate = strftime('%a, %d %b') - # Dentro de la div 'issue_item' el unico parrafo que hay es el - # resumen - summary = div.find('p') - # Si hay summary la description sera el summary... si no, la - # dejamos en blanco - description = '' - - if summary: - description = self.tag_to_string(summary, use_alt=False) - print("Description = ", description) - - # Esto esta copiado del NY times - feed = key if key is not None else 'Uncategorized' - if feed not in articles: - articles[feed] = [] - if 'podcasts' not in url: - articles[feed].append( - dict(title=title, url=url, date=pubdate, - description=description, - content='')) - feeds = [(k, articles[k]) for k in feeds if k in articles] - return feeds diff --git a/recipes/cityavisen_dk.recipe b/recipes/cityavisen_dk.recipe deleted file mode 100644 index c9f493ad86..0000000000 --- a/recipes/cityavisen_dk.recipe +++ /dev/null @@ -1,28 +0,0 @@ -#!/usr/bin/env python -# vim:fileencoding=utf-8 -# https://manual.calibre-ebook.com/news_recipe.html -from __future__ import absolute_import, division, print_function, unicode_literals - -from calibre.web.feeds.news import BasicNewsRecipe - -''' -City Avisen -''' - - -class CityAvisen_dk(BasicNewsRecipe): - __author__ = 'CoderAllan.github.com' - title = 'City Avisen' - - category = 'newspaper, news, localnews, sport, culture, Denmark' - oldest_article = 7 - max_articles_per_feed = 50 - auto_cleanup = True - language = 'da' - - feeds = [ - ('City Avisen', 'http://minby.dk/city-avisen/feed/'), - ('Kommentarer til City Avisen', 'http://minby.dk/city-avisen/comments/feed/'), - - ] - diff --git a/recipes/cjr.recipe b/recipes/cjr.recipe deleted file mode 100644 index b360e85223..0000000000 --- a/recipes/cjr.recipe +++ /dev/null @@ -1,16 +0,0 @@ -from calibre.web.feeds.news import BasicNewsRecipe - - -class CJR(BasicNewsRecipe): - title = u'Columbia Journalism Review' - __author__ = u'Xanthan Gum' - description = 'News about journalism.' - language = 'en' - - oldest_article = 7 - max_articles_per_feed = 100 - - feeds = [(u'News Stories', u'http://www.cjr.org/index.xml')] - - def print_version(self, url): - return url + '?page=all&print=true' diff --git a/recipes/clarion_ledger.recipe b/recipes/clarion_ledger.recipe deleted file mode 100644 index e52c093d78..0000000000 --- a/recipes/clarion_ledger.recipe +++ /dev/null @@ -1,28 +0,0 @@ -from calibre.web.feeds.news import BasicNewsRecipe - - -class ClarionLedger(BasicNewsRecipe): - title = u'Clarion Ledger' - oldest_article = 7 - max_articles_per_feed = 100 - no_stylesheets = True - language = 'en' - __author__ = 'cr4zyd' - - feeds = [ - (u'Local News', u'http://www.clarionledger.com/apps/pbcs.dll/oversikt?Category=RSS01'), - (u'Breaking News', u'http://www.clarionledger.com/apps/pbcs.dll/section?Category=RSS'), - - (u'Sports', u'http://www.clarionledger.com/apps/pbcs.dll/oversikt?Category=RSS02'), - (u'Business', u'http://www.clarionledger.com/apps/pbcs.dll/oversikt?Category=RSS03')] - - keep_only_tags = [dict(name='div', attrs={'class': 'article-headline'}), - dict(name='div', attrs={'class': 'article-bodytext'})] - remove_tags = [dict(name=['img', 'script', 'li']), - dict(name='p', attrs={'class': 'ratingbyline'}), - dict(name='div', attrs={'class': 'article-tools'}), - dict(name='div', attrs={ - 'class': 'article-pagination article-pagination-top'}), - dict(name='div', attrs={ - 'class': 'article-pagination article-pagination-bottom'}), - dict(name='div', attrs={'class': 'articleflex-container'})] diff --git a/recipes/clic_rbs.recipe b/recipes/clic_rbs.recipe deleted file mode 100644 index d08e414ea3..0000000000 --- a/recipes/clic_rbs.recipe +++ /dev/null @@ -1,53 +0,0 @@ -from calibre.web.feeds.news import BasicNewsRecipe - - -class ClicRBS(BasicNewsRecipe): - title = u'ClicRBS' - language = 'pt' - __author__ = 'arvoredo' - oldest_article = 3 - max_articles_per_feed = 9 - cover_url = 'http://www.publicidade.clicrbs.com.br/clicrbs/imgs/logo_clic.gif' - - remove_tags = [ - dict(name='div', attrs={ - 'class': ['clic-barra-inner', 'botao-versao-mobile ']}) - ] - - remove_tags_before = dict(name='div ', attrs={'class': 'descricao'}) - remove_tags_before = dict(name='div', attrs={'id': 'glb-corpo'}) - remove_tags_before = dict(name='div', attrs={'class': 'descricao'}) - remove_tags_before = dict(name='div', attrs={'class': 'coluna'}) - remove_tags_after = dict(name='div', attrs={'class': 'extra'}) - remove_tags_after = dict(name='div', attrs={'id': 'links-patrocinados'}) - remove_tags_after = dict(name='h4', attrs={'class': 'tipo-c comente'}) - remove_tags_after = dict(name='ul', attrs={'class': 'lista'}) - - feeds = [ - - (u'zerohora.com, clicRBS', u'http://www.clicrbs.com.br/jsp/rssfeed.jspx?uf=1&local=1&channel=13'), - (u'diariocatarinense.com, clicRBS', u'http://www.clicrbs.com.br/jsp/rssfeed.jspx?uf=2&local=18&channel=67'), - (u'Concursos e Emprego', u'http://g1.globo.com/Rss2/0,,AS0-9654,00.xml'), - (u'Pioneiro.com, clicRBS', u'http://www.clicrbs.com.br/jsp/rssfeed.jspx?channel=87&uf=1&local=1'), - (u'Economia, zerohora.com, clicRBS', u'http://www.clicrbs.com.br/jsp/rssfeed.jspx?sect_id=801&uf=1&local=1&channel=13'), - (u'Esportes, zerohora.com, clicRBS', u'http://www.clicrbs.com.br/jsp/rssfeed.jspx?sect_id=802&uf=1&local=1&channel=13'), - (u'Economia, Pioneiro.com, clicRBS', u'http://www.clicrbs.com.br/jsp/rssfeed.jspx?sect_id=1180&channel=87&uf=1&local=1'), - (u'Política, Pioneiro.com, clicRBS', u'http://www.clicrbs.com.br/jsp/rssfeed.jspx?sect_id=1185&channel=87&uf=1&local=1'), - (u'Mundo, Pioneiro.com, clicRBS', u'http://www.clicrbs.com.br/jsp/rssfeed.jspx?sect_id=1184&channel=87&uf=1&local=1'), - (u'Catarinense, Esportes, clicRBS', u'http://www.clicrbs.com.br/jsp/rssfeed.jspx?sect_id=&theme=371&uf=2&channel=2'), - (u'Geral, Pioneiro.com, clicRBS', u'http://www.clicrbs.com.br/jsp/rssfeed.jspx?sect_id=1183&channel=87&uf=1&local=1'), - (u'Estilo de Vida, zerohora.com, clicRBS', u'http://www.clicrbs.com.br/jsp/rssfeed.jspx?sect_id=805&uf=1&local=1&channel=13'), - (u'Corrida, Corrida, Esportes, clicRBS', u'http://www.clicrbs.com.br/jsp/rssfeed.jspx?sect_id=1313&theme=15704&uf=1&channel=2'), - (u'Jornal de Santa Catarina, clicRBS', u'http://www.clicrbs.com.br/jsp/rssfeed.jspx?espid=159&uf=2&local=18'), - (u'Grêmio, Futebol, Esportes, clicRBS', u'http://www.clicrbs.com.br/jsp/rssfeed.jspx?sect_id=11&theme=65&uf=1&channel=2'), - (u'Velocidade, Esportes, clicRBS', u'http://www.clicrbs.com.br/jsp/rssfeed.jspx?sect_id=1314&theme=2655&uf=1&channel=2') - ] - - extra_css = ''' - cite{color:#007BB5; font-size:xx-small; font-style:italic;} - body{font-family:Arial,Helvetica,sans-serif;font-size:x-small;} - h3{font-size:large; color:#082963; font-weight:bold;} - #ident{color:#0179B4; font-size:xx-small;} - p{color:#000000;font-weight:normal;} - .commentario p{color:#007BB5; font-style:italic;} - ''' diff --git a/recipes/climate_progress.recipe b/recipes/climate_progress.recipe deleted file mode 100644 index e7ecd93e67..0000000000 --- a/recipes/climate_progress.recipe +++ /dev/null @@ -1,63 +0,0 @@ -#!/usr/bin/env python - -__license__ = 'GPL v3' -__copyright__ = '2009, Darko Miletic ' -''' -climateprogress.org -''' - -from calibre.ebooks.BeautifulSoup import Tag -from calibre.web.feeds.news import BasicNewsRecipe - - -def new_tag(soup, name, attrs=()): - impl = getattr(soup, 'new_tag', None) - if impl is not None: - return impl(name, attrs=dict(attrs)) - return Tag(soup, name, attrs=attrs or None) - - -class ClimateProgress(BasicNewsRecipe): - title = 'Climate Progress' - __author__ = 'Darko Miletic' - description = "An insider's view of climate science, politics and solutions" - publisher = 'Climate Progress' - category = 'news, ecology, climate, blog' - oldest_article = 7 - max_articles_per_feed = 100 - no_stylesheets = True - use_embedded_content = True - encoding = 'utf-8' - language = 'en' - - lang = 'en' - direction = 'ltr' - - html2lrf_options = [ - '--comment', description, '--category', category, '--publisher', publisher - ] - - html2epub_options = 'publisher="' + publisher + \ - '"\ncomments="' + description + '"\ntags="' + category + '"' - - extra_css = ''' - h2{color:#003366;font-size: large ;font-family:Arial,Helvetica,sans-serif; font-weight:bold;} - h3{color:#003366;font-size: small ;font-family:Arial,Helvetica,sans-serif; font-weight:bold;} - h4{color:#003366;font-size: x-small ;font-family:Arial,Helvetica,sans-serif; font-weight:bold;} - .date{color:#333333; font-size:xx-small; font-family:Arial,Helvetica,sans-serif; font-style:italic} - a{color:#339966;} - body{font-family:Georgia,Times New Roman,Times,serif; font-size:x-small;color:#333333;} - ''' - - feeds = [(u'Posts', u'http://feeds.feedburner.com/climateprogress/lCrX')] - - def preprocess_html(self, soup): - soup.html['lang'] = self.lang - soup.html['dir'] = self.direction - mlang = new_tag(soup, 'meta', [ - ("http-equiv", "Content-Language"), ("content", self.lang)]) - mcharset = new_tag(soup, 'meta', [ - ("http-equiv", "Content-Type"), ("content", "text/html; charset=utf-8")]) - soup.head.insert(0, mlang) - soup.head.insert(1, mcharset) - return self.adeify_images(soup) diff --git a/recipes/coding_horror.recipe b/recipes/coding_horror.recipe deleted file mode 100644 index 83a33220ff..0000000000 --- a/recipes/coding_horror.recipe +++ /dev/null @@ -1,33 +0,0 @@ -__license__ = 'GPL v3' -__copyright__ = '2009-2012, Darko Miletic ' -''' -www.codinghorror.com/blog/ -''' - -from calibre.web.feeds.news import BasicNewsRecipe - - -class CodingHorror(BasicNewsRecipe): - title = 'Coding Horror' - __author__ = 'Darko Miletic' - description = 'programming and human factors - Jeff Atwood' - category = 'blog, programming' - publisher = 'Jeff Atwood' - language = 'en' - oldest_article = 30 - max_articles_per_feed = 100 - no_stylesheets = True - use_embedded_content = True - encoding = 'utf8' - auto_cleanup = True - - conversion_options = { - 'comment': description, 'tags': category, 'publisher': publisher, 'language': language, 'authors': publisher - } - - remove_tags = [ - dict(name=['object', 'link']), dict( - name='div', attrs={'class': 'feedflare'}) - ] - - feeds = [(u'Articles', u'http://feeds2.feedburner.com/codinghorror')] diff --git a/recipes/columbusdispatch.recipe b/recipes/columbusdispatch.recipe deleted file mode 100644 index 476b57ba71..0000000000 --- a/recipes/columbusdispatch.recipe +++ /dev/null @@ -1,54 +0,0 @@ -from calibre.web.feeds.news import BasicNewsRecipe - - -class ColumbusDispatchRecipe(BasicNewsRecipe): - __license__ = 'GPL v3' - __author__ = 'kwetal' - language = 'en' - version = 1 - - title = u'The Columbus Dispatch' - publisher = u'The Columbus Dispatch' - category = u'News, Newspaper' - description = u'Daily newspaper from central Ohio' - - use_embedded_content = False - remove_empty_feeds = True - oldest_article = 1.2 - use_embedded_content = False - - no_stylesheets = True - auto_cleanup = True - # Feeds from http://www.dispatch.com/live/content/rss/index.html - feeds = [ - ('Local', - 'http://www.dispatch.com/content/syndication/news_local-state.xml'), - ('National', - 'http://www.dispatch.com/content/syndication/news_national.xml'), - ('Business', - 'http://www.dispatch.com/content/syndication/news_business.xml'), - ('Editorials', - 'http://www.dispatch.com/content/syndication/opinion_editorials.xml'), - ('Columnists', - 'http://www.dispatch.com/content/syndication/opinion_columns.xml'), - ('Life and Arts', - 'http://www.dispatch.com/content/syndication/lae_life-and-arts.xml'), - ('OSU Sports', - 'http://www.dispatch.com/content/syndication/sports_osu.xml'), - ('Auto Racing', - 'http://www.dispatch.com/content/syndication/sports_auto-racing.xml'), - ('Outdoors', - 'http://www.dispatch.com/content/syndication/sports_outdoors.xml'), - ('Bengals', - 'http://www.dispatch.com/content/syndication/sports_bengals.xml'), - ('Indians', - 'http://www.dispatch.com/content/syndication/sports_indians.xml'), - ('Clippers', - 'http://www.dispatch.com/content/syndication/sports_clippers.xml'), - ('Crew', - 'http://www.dispatch.com/content/syndication/sports_crew.xml'), - ('Reds', - 'http://www.dispatch.com/content/syndication/sports_reds.xml'), - ('Blue Jackets', - 'http://www.dispatch.com/content/syndication/sports_bluejackets.xml'), - ] diff --git a/recipes/computerworld_pl.recipe b/recipes/computerworld_pl.recipe deleted file mode 100644 index 256326967c..0000000000 --- a/recipes/computerworld_pl.recipe +++ /dev/null @@ -1,27 +0,0 @@ -# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai -import re - -from calibre.web.feeds.news import BasicNewsRecipe - - -class Computerworld_pl(BasicNewsRecipe): - title = u'Computerworld.pl' - __author__ = 'fenuks' - description = u'Serwis o IT w przemyśle, finansach, handlu, administracji oraz rynku IT i telekomunikacyjnym - wiadomości, opinie, analizy, porady prawne' - category = 'IT' - language = 'pl' - masthead_url = 'http://g1.computerworld.pl/cw/beta_gfx/cw2.gif' - cover_url = 'http://g1.computerworld.pl/cw/beta_gfx/cw2.gif' - no_stylesheets = True - oldest_article = 7 - max_articles_per_feed = 100 - use_embedded_content = False - preprocess_regexps = [(re.compile(u'Zobacz również:', re.IGNORECASE), lambda m: ''), - (re.compile(u'[*]+reklama[*]+', re.IGNORECASE), lambda m: ''), ] - keep_only_tags = [dict(name='article')] - remove_tags = [dict(attrs={'class': ['share_tools nocontent', 'rec']}), - dict(name='ul',attrs={'class':'tags'}), - dict(name='ol'), - dict(id=['topComment', 'bottom_tools'])] - - feeds = [(u'Wiadomo\u015bci', u'https://www.computerworld.pl/news?rss')] diff --git a/recipes/consortium_news.recipe b/recipes/consortium_news.recipe deleted file mode 100644 index 9c861dd238..0000000000 --- a/recipes/consortium_news.recipe +++ /dev/null @@ -1,78 +0,0 @@ -#!/usr/bin/env python -## -# Title: Consortium News -## -# License: GNU General Public License v3 - -# http://www.gnu.org/copyleft/gpl.html - -# Feb 2012: Initial release -__license__ = 'GNU General Public License v3 - http://www.gnu.org/copyleft/gpl.html' -''' -consortiumnews.com -''' -import re - -from calibre.web.feeds.news import BasicNewsRecipe - - -class ConsortiumNews(BasicNewsRecipe): - - title = u'Consortium News' - publisher = 'Copyright © 2012 Consortiumnews. All Rights Reserved.' - language = 'en' - __author__ = 'kiavash' - - oldest_article = 7 - max_articles_per_feed = 100 - - no_stylesheets = True - remove_javascript = True - - # Flattens all the tables to make it compatible with Nook - conversion_options = {'linearize_tables': True} - - remove_attributes = ['border', 'cellspacing', 'align', 'cellpadding', 'colspan', - 'valign', 'vspace', 'hspace', 'alt', 'width', 'height'] - - # Specify extra CSS - overrides ALL other CSS (IE. Added last). - extra_css = 'body { font-family: verdana, helvetica, sans-serif; } \ - .introduction, .first { font-weight: bold; } \ - .cross-head { font-weight: bold; font-size: 125%; } \ - .cap, .caption { display: block; font-size: 80%; font-style: italic; } \ - .cap, .caption, .caption img, .caption span { display: block; margin: 5px auto; } \ - .byl, .byd, .byline img, .byline-name, .byline-title, .author-name, .author-position, \ - .correspondent-portrait img, .byline-lead-in, .name, .bbc-role { display: block; \ - font-size: 80%; font-style: italic; margin: 1px auto; } \ - .story-date, .published { font-size: 80%; } \ - table { width: 100%; } \ - td img { display: block; margin: 5px auto; } \ - ul { padding-top: 10px; } \ - ol { padding-top: 10px; } \ - li { padding-top: 5px; padding-bottom: 5px; } \ - h1 { font-size: 175%; font-weight: bold; } \ - h2 { font-size: 150%; font-weight: bold; } \ - h3 { font-size: 125%; font-weight: bold; } \ - h4, h5, h6 { font-size: 100%; font-weight: bold; }' - - # Remove the line breaks and float left/right and picture width/height. - preprocess_regexps = [(re.compile(r'', re.IGNORECASE), lambda m: ''), - (re.compile(r'', - re.IGNORECASE), lambda m: ''), - (re.compile(r'float:.*?'), lambda m: ''), - (re.compile(r'width:.*?px'), lambda m: ''), - (re.compile(r'height:.*?px'), lambda m: ''), - (re.compile(r''), lambda h1: ''), - (re.compile(r''), lambda h2: ''), - ] - - # Main article is inside this tag - keep_only_tags = [ - dict(name='div', attrs={'id': lambda x: x and 'post-' in x})] - - remove_tags = [ - # remove 'Share this Article' - dict(name='div', attrs={'class': 'sociable'}), - dict(name='p', attrs={'class': 'tags'}), # remove 'Tags: ... ' - ] - - feeds = [(u'Consortium News', u'http://feeds.feedburner.com/Consortiumnewscom')] diff --git a/recipes/contemporary_argentine_writers.recipe b/recipes/contemporary_argentine_writers.recipe deleted file mode 100644 index 93b74b8092..0000000000 --- a/recipes/contemporary_argentine_writers.recipe +++ /dev/null @@ -1,33 +0,0 @@ -__license__ = 'GPL v3' -__copyright__ = '2013, Darko Miletic ' -''' -contemporaryargentinewriters.wordpress.com -''' - -from calibre.web.feeds.news import BasicNewsRecipe - - -class contemporaryargentinewriters(BasicNewsRecipe): - title = 'Contemporary Argentine Writers' - __author__ = 'Darko Miletic' - description = 'Short stories by Argentine writers (and others) translated into English' - publisher = 'Dario Bard' - category = 'fiction, literature, Argentina, english' - oldest_article = 25 - max_articles_per_feed = 200 - no_stylesheets = True - encoding = 'utf8' - use_embedded_content = True - language = 'en_AR' - remove_empty_feeds = True - publication_type = 'blog' - extra_css = """ - body{font-family: Arial,Helvetica,sans-serif } - img{margin-bottom: 0.4em; display:block} - """ - - conversion_options = { - 'comment': description, 'tags': category, 'publisher': publisher, 'language': language - } - - feeds = [(u'Posts', u'http://contemporaryargentinewriters.wordpress.com/feed/')] diff --git a/recipes/corriere_della_sera_en.recipe b/recipes/corriere_della_sera_en.recipe deleted file mode 100644 index 3606d0eb7e..0000000000 --- a/recipes/corriere_della_sera_en.recipe +++ /dev/null @@ -1,95 +0,0 @@ -#!/usr/bin/env python -__license__ = 'GPL v3' -__author__ = 'Lorenzo Vigentini, based on Darko Miletic' -__copyright__ = '2009, Darko Miletic , Lorenzo Vigentini ' -__version__ = 'v1.02' -__date__ = '14, March 2010' -__description__ = 'Italian daily newspaper (english version)' -# NOTE: the feeds url are broken on the main site as the permalink structure has been changed erroneously ie: -# actual link in feed http://www.corriere.it/english/10_marzo_11/legitimate_impediment_approved_de9ba480-2cfd-11df-a00c-00144f02aabe.shtml -# this needs to be change to -# real feed URL -# http://www.corriere.it/International/english/articoli/2010/03/11/legitimate_impediment_approved.shtml -''' -http://www.corriere.it/ -''' - -from calibre.web.feeds.news import BasicNewsRecipe - - -class ilCorriereEn(BasicNewsRecipe): - author = 'Lorenzo Vigentini, based on Darko Miletic' - description = 'Italian daily newspaper (english version)' - - cover_url = 'http://images.corriereobjects.it/images/static/common/logo_home.gif?v=200709121520' - title = u'Il Corriere della sera (english) ' - publisher = 'RCS Digital' - category = 'News, politics, culture, economy, general interest' - - language = 'en' - timefmt = '[%a, %d %b, %Y]' - - oldest_article = 5 - max_articles_per_feed = 100 - use_embedded_content = False - recursion = 10 - - remove_javascript = True - no_stylesheets = True - - recipe_specific_options = { - 'days': { - 'short': 'Oldest article to download from this news source. In days ', - 'long': 'For example, 0.5, gives you articles from the past 12 hours', - 'default': str(oldest_article) - } - } - - def __init__(self, *args, **kwargs): - BasicNewsRecipe.__init__(self, *args, **kwargs) - d = self.recipe_specific_options.get('days') - if d and isinstance(d, str): - self.oldest_article = float(d) - - def get_article_url(self, article): - articleUrl = article.get('link') - segments = articleUrl.split('/') - basename = '/'.join(segments[:3]) + '/' + \ - 'International/english/articoli/' - - # the date has to be redone with the url structure - mlist1 = ['gennaio', 'febbraio', 'marzo', 'aprile', 'maggio', 'giugno', - 'luglio', 'agosto', 'settembre', 'ottobre', 'novembre', 'dicembre'] - mlist2 = ['01', '02', '03', '04', '05', - '06', '07', '08', '09', '10', '11', '12'] - myDate = segments[4].split('_') - x = 0 - for x in range(11): - if myDate[1] == mlist1[x]: - noMonth = mlist2[x] - break - - newDateUrl = '20' + myDate[0] + '/' + noMonth + '/' + myDate[2] + '/' - - # clean the article title - articleURLseg = segments[5].split('-') - myArticle = (articleURLseg[0])[:-9] + '.shtml' - - myURL = basename + newDateUrl + myArticle - # print myURL - return myURL - - keep_only_tags = [ - dict(name='div', attrs={'class': ['news-dettaglio article', 'article']})] - - remove_tags = [ - dict(name=['base', 'object', 'link', 'embed']), - dict(name='div', attrs={'class': 'news-goback'}), - dict(name='ul', attrs={'class': 'toolbar'}) - ] - - remove_tags_after = dict(name='p', attrs={'class': 'footnotes'}) - - feeds = [ - (u'News', u'http://www.corriere.it/rss/english.xml') - ] diff --git a/recipes/corriere_dello_sport.recipe b/recipes/corriere_dello_sport.recipe deleted file mode 100644 index df743f4b00..0000000000 --- a/recipes/corriere_dello_sport.recipe +++ /dev/null @@ -1,55 +0,0 @@ -#!/usr/bin/env python -__license__ = 'GPL v3' -__author__ = 'GabrieleMarini, based on Darko Miletic' -__copyright__ = '2009, Darko Miletic , Gabriele Marini' -__version__ = ' ' -__date__ = '14-06-2010' -__description__ = 'Italian daily newspaper' - -''' -http://www.corrieredellosport.it/ -''' -from calibre.web.feeds.news import BasicNewsRecipe - - -class ilCorrieredelloSport(BasicNewsRecipe): - __author__ = 'Gabriele Marini' - description = 'Italian daily newspaper' - - cover_url = 'http://edicola.corrieredellosport.it/newsmem/corsport/prima/nazionale_prima.jpg' - - title = u'Il Corriere dello Sport' - publisher = 'CORRIERE DELLO SPORT s.r.l. ' - category = 'Sport' - - language = 'it' - timefmt = '[%a, %d %b, %Y]' - - oldest_article = 10 - max_articles_per_feed = 100 - use_embedded_content = False - recursion = 10 - - remove_javascript = True - no_stylesheets = True - - html2lrf_options = [ - '--comment', description, '--category', category, '--publisher', publisher, '--ignore-tables' - ] - - html2epub_options = 'publisher="' + publisher + '"\ncomments="' + \ - description + '"\ntags="' + category + '"\nlinearize_tables=True' - - keep_only_tags = [ - dict(name='h1', attrs={'class': ['tit_Article']}), - dict(name='h1', attrs={'class': ['tit_Article_mondiali']}), - dict(name='div', attrs={'class': ['box_Img']}), - dict(name='p', attrs={'class': ['summary', 'text']})] - - feeds = [ - (u'Primo Piano', u'http://www.corrieredellosport.it/rss/primo_piano.xml'), - (u'Calcio', u'http://www.corrieredellosport.it/rss/Calcio-3.xml'), - (u'Formula 1', u'http://www.corrieredellosport.it/rss/Formula-1-7.xml'), - (u'Moto', u'http://www.corrieredellosport.it/rss/Moto-8.xml'), - (u'Piu visti', u'http://www.corrieredellosport.it/rss/piu_visti.xml') - ] diff --git a/recipes/cosmopolitan.recipe b/recipes/cosmopolitan.recipe deleted file mode 100644 index 23cd72b896..0000000000 --- a/recipes/cosmopolitan.recipe +++ /dev/null @@ -1,66 +0,0 @@ -#!/usr/bin/env python - -__license__ = 'GPL v3' -__author__ = '2010, Gustavo Azambuja ' -''' -Muy Interesante -''' - -from calibre.web.feeds.news import BasicNewsRecipe - - -class General(BasicNewsRecipe): - title = 'Cosmopolitan' - __author__ = 'Gustavo Azambuja' - description = 'Revista Cosmopolitan, Edicion Espanola' - language = 'es' - timefmt = '[%a, %d %b, %Y]' - use_embedded_content = False - recursion = 1 - encoding = 'utf8' - remove_javascript = True - no_stylesheets = True - conversion_options = {'linearize_tables': True} - - oldest_article = 180 - max_articles_per_feed = 100 - keep_only_tags = [ - dict(id=['contenido']), - dict(name='td', attrs={'class': ['contentheading', 'txt_articulo']}) - ] - remove_tags = [ - dict(name='div', attrs={'class': ['breadcrumb', 'bloque1', 'article', 'bajo_title', - 'tags_articles', 'otrosenlaces_title', 'otrosenlaces_parent', 'compartir']}), - dict(name='div', attrs={'id': 'comment'}), - dict(name='table', attrs={'class': 'pagenav'}), - dict(name=['object', 'link']) - ] - remove_attributes = ['width', 'height', 'style', 'font', 'color'] - - extra_css = ''' - h1{font-family:Geneva, Arial, Helvetica, sans-serif;color:#154B7A;} - h3{font-size: 14px;color:#999999; font-family:Geneva, Arial, Helvetica, sans-serif;font-weight: bold;} - h2{color:#666666; font-family:Geneva, Arial, Helvetica, sans-serif;font-size:small;} - img {float:left; clear:both; margin:10px} - p {font-family:Arial,Helvetica,sans-serif;} - ''' - feeds = [ - (u'Articulos', u'http://feeds.feedburner.com/cosmohispano/FSSt') - ] - - def preprocess_html(self, soup): - attribs = ['style', 'font', 'valign', 'colspan', 'width', 'height', 'rowspan', 'summary', 'align', 'cellspacing', 'cellpadding', 'frames', 'rules', 'border' ] # noqa - for item in soup.body.findAll(name=['table', 'td', 'tr', 'th', 'caption', 'thead', 'tfoot', 'tbody', 'colgroup', 'col']): - item.name = 'div' - for attrib in attribs: - item[attrib] = '' - del item[attrib] - return soup - - def get_cover_url(self): - index = 'http://www.cosmohispano.com/revista' - soup = self.index_to_soup(index) - link_item = soup.find('img', attrs={'class': 'img_portada'}) - if link_item: - cover_url = "http://www.cosmohispano.com" + link_item['src'] - return cover_url diff --git a/recipes/cosmopolitan_de.recipe b/recipes/cosmopolitan_de.recipe deleted file mode 100644 index efc7e0dd42..0000000000 --- a/recipes/cosmopolitan_de.recipe +++ /dev/null @@ -1,36 +0,0 @@ -from calibre.web.feeds.news import BasicNewsRecipe - - -class AdvancedUserRecipe1305567197(BasicNewsRecipe): - title = u'Cosmopolitan.de' - __author__ = 'schuster' - oldest_article = 7 - language = 'de' - max_articles_per_feed = 100 - no_stylesheets = True - use_embedded_content = False - remove_javascript = True - cover_url = 'http://www.cosmopolitan.com/cm/shared/site_images/print_this/cosmopolitan_logo.gif' - remove_tags_before = dict(name='h1', attrs={'class': 'artikel'}) - remove_tags_after = dict(name='div', attrs={'class': 'morePages'}) - extra_css = ''' - h2{font-family:Arial,Helvetica,sans-serif; font-size: x-small;} - h1{ font-family:Arial,Helvetica,sans-serif; font-size:x-large; font-weight:bold;} - ''' - remove_tags = [dict(id='strong'), - dict(title='strong'), - dict(name='span'), - dict(name='li', attrs={'class': 'large'}), - dict(name='ul', attrs={ - 'class': 'articleImagesPortrait clearfix'}), - dict(name='p', attrs={'class': 'external'}), - dict(name='a', attrs={'target': '_blank'}), ] - feeds = [(u'Komplett', u'http://www.cosmopolitan.de/rss/allgemein.xml'), - (u'Mode', u'http://www.cosmopolitan.de/rss/mode.xml'), - (u'Beauty', u'http://www.cosmopolitan.de/rss/beauty.xml'), - (u'Liebe&Sex', u'http://www.cosmopolitan.de/rss/liebe.xml'), - (u'Psychologie', u'http://www.cosmopolitan.de/rss/psychologie.xml'), - (u'Job&Karriere', u'http://www.cosmopolitan.de/rss/job.xml'), - (u'Lifestyle', u'http://www.cosmopolitan.de/rss/lifestyle.xml'), - (u'Shopping', u'http://www.cosmopolitan.de/rss/shopping.xml'), - (u'Bildergalerien', u'http://www.cosmopolitan.de/rss/bildgalerien.xml')] diff --git a/recipes/cosmopolitan_uk.recipe b/recipes/cosmopolitan_uk.recipe deleted file mode 100644 index 17f184c883..0000000000 --- a/recipes/cosmopolitan_uk.recipe +++ /dev/null @@ -1,41 +0,0 @@ -import re - -from calibre.web.feeds.news import BasicNewsRecipe - - -class AdvancedUserRecipe1390635673(BasicNewsRecipe): - title = u'Cosmopolitan UK' - description = 'Womens Fashion, beauty and Gossip for women from COSMOPOLITAN -UK' - language = 'en_GB' - - __author__ = 'Dave Asbury' - # 2/2/14 - oldest_article = 28 - max_articles_per_feed = 10 - compress_news_images = True - compress_news_images_max_size = 20 - auto_cleanup_keep = '//div[@class="articleHeading"]' - auto_cleanup = True - ignore_duplicate_articles = {'title', 'url'} - no_stylesheets = True - masthead_url = 'http://www.cosmopolitan.co.uk//cm/cosmopolitanuk/site_images/site_logo.gif' - cover_url = 'http://www.natmagnewsletters.co.uk/CIRCULES/CosmoXXLCover.jpg' - # kovids code - - def preprocess_raw_html(self, raw_html, url): - for pat, f in [ - (re.compile(r':: [\w].+', - re.DOTALL), lambda m: ''), - - ]: - raw_html = pat.sub(f, raw_html) - return raw_html - - feeds = [ - (u'Love & Sex', u'http://www.cosmopolitan.co.uk/love-sex/rss/'), - (u'Men', u'http://cosmopolitan.co.uk/men/rss/'), - (u'Fashion', u'http://cosmopolitan.co.uk/fashion/rss/'), - (u'Hair & Beauty', u'http://cosmopolitan.co.uk/beauty-hair/rss/'), - (u'LifeStyle', u'http://cosmopolitan.co.uk/lifestyle/rss/'), - (u'Cosmo On Campus', u'http://cosmopolitan.co.uk/campus/rss/'), - (u'Celebrity Gossip', u'http://cosmopolitan.co.uk/celebrity-gossip/rss/')] diff --git a/recipes/cotidianul.recipe b/recipes/cotidianul.recipe deleted file mode 100644 index b300f02903..0000000000 --- a/recipes/cotidianul.recipe +++ /dev/null @@ -1,66 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- - -__license__ = 'GPL v3' -__copyright__ = u'2011, Silviu Cotoar\u0103' -''' -cotidianul.ro -''' - -from calibre.web.feeds.news import BasicNewsRecipe - - -class Cotidianul(BasicNewsRecipe): - title = u'Cotidianul' - __author__ = u'Silviu Cotoar\u0103' - description = u'' - publisher = u'Cotidianul' - oldest_article = 25 - language = 'ro' - max_articles_per_feed = 100 - no_stylesheets = True - use_embedded_content = False - category = 'Ziare,Stiri' - encoding = 'utf-8' - cover_url = 'http://www.cotidianul.ro/images/cotidianul.png' - - conversion_options = { - 'comments': description, 'tags': category, 'language': language, 'publisher': publisher - } - - extra_css = ''' - h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;} - h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;} - .byline {font-family:Arial,Helvetica,sans-serif; font-size:xx-small;} - .date {font-family:Arial,Helvetica,sans-serif; font-size:xx-small;} - p{font-family:Arial,Helvetica,sans-serif;font-size:small;} - .copyright {font-family:Arial,Helvetica,sans-serif;font-size:xx-small;text-align:center} - .story{font-family:Arial,Helvetica,sans-serif;font-size:small;} - .entry-asset asset hentry{font-family:Arial,Helvetica,sans-serif;font-size:small;} - .pagebody{font-family:Arial,Helvetica,sans-serif;font-size:small;} - .maincontentcontainer{font-family:Arial,Helvetica,sans-serif;font-size:small;} - .story-body{font-family:Arial,Helvetica,sans-serif;font-size:small;} - body{font-family:Helvetica,Arial,sans-serif;font-size:small;} - ''' - - keep_only_tags = [ - dict(name='div', attrs={'class': 'titlu'}), dict(name='div', attrs={ - 'class': 'gallery clearfix'}), dict(name='div', attrs={'align': 'justify'}) - ] - - remove_tags = [ - dict(name='div', attrs={'class': ['space']}), dict( - name='div', attrs={'id': ['title_desc']}) - ] - - remove_tags_after = [ - dict(name='div', attrs={'class': ['space']}), dict( - name='span', attrs={'class': ['date']}) - ] - - feeds = [ - (u'Feeds', u'http://www.cotidianul.ro/rssfeed/ToateStirile.xml') - ] - - def preprocess_html(self, soup): - return self.adeify_images(soup) diff --git a/recipes/counterpunch.recipe b/recipes/counterpunch.recipe deleted file mode 100644 index d32d6a912b..0000000000 --- a/recipes/counterpunch.recipe +++ /dev/null @@ -1,11 +0,0 @@ -from calibre.web.feeds.news import BasicNewsRecipe - - -class Counterpunch(BasicNewsRecipe): - title = u'Counterpunch' - oldest_article = 7 - max_articles_per_feed = 100 - auto_cleanup = True - language = 'en' - - feeds = [(u'Counterpunch', u'http://www.counterpunch.org/category/article/feed/')] diff --git a/recipes/countryfile.recipe b/recipes/countryfile.recipe deleted file mode 100644 index 8ce935d737..0000000000 --- a/recipes/countryfile.recipe +++ /dev/null @@ -1,44 +0,0 @@ -import re - -from calibre import browser -from calibre.web.feeds.news import BasicNewsRecipe - - -class AdvancedUserRecipe1325006965(BasicNewsRecipe): - title = u'Countryfile.com' - __author__ = 'Dave Asbury' - description = 'The official website of Countryfile Magazine' - # last updated 24.10.14 - language = 'en_GB' - oldest_article = 30 - max_articles_per_feed = 25 - remove_empty_feeds = True - no_stylesheets = True - auto_cleanup = True - compress_news_images = True - ignore_duplicate_articles = {'title', 'url'} - - def get_cover_url(self): - soup = self.index_to_soup('http://www.countryfile.com/magazine') - cov = soup.find(attrs={'class': re.compile( - 'imagecache imagecache-250px')}) # 'width' : '160', - cov = str(cov) - cov = cov[10:] - cov = cov[:-135] - # print '++++ ',cov,' ++++' - br = browser() - - br.set_handle_redirect(False) - try: - br.open_novisit(cov) - cover_url = cov - except: - cover_url = 'http://www.countryfile.com/sites/default/files/imagecache/160px_wide/cover/2_1.jpg' - return cover_url - - preprocess_regexps = [ - (re.compile(r' \| Countryfile.com', re.IGNORECASE | re.DOTALL), lambda match: '')] - feeds = [ - (u'Country News', u'http://www.feed43.com/7204505705648666.xml'), - (u'Articles', u'http://www.feed43.com/8542080013204443.xml'), - ] diff --git a/recipes/courier_mail.recipe b/recipes/courier_mail.recipe deleted file mode 100644 index 0823fc7289..0000000000 --- a/recipes/courier_mail.recipe +++ /dev/null @@ -1,33 +0,0 @@ -import datetime - -from calibre.web.feeds.news import BasicNewsRecipe - - -class Politics(BasicNewsRecipe): - title = u'The Courier-Mail' - description = 'Breaking news headlines for Brisbane and Queensland, Australia. The Courier-Mail is owned by News Corp Australia.' - language = 'en_AU' - __author__ = 'Krittika Goyal, James Cridland' - oldest_article = 3 # days - max_articles_per_feed = 20 - use_embedded_content = False - - d = datetime.datetime.today() - cover_url='http://mfeeds.news.com.au/smedia/NCCOURIER/NCCM_1_' + d.strftime('%Y_%m_%d') + '_thumb_big.jpg' - masthead_url='https://couriermail.digitaleditions.com.au/images/couriermail-logo.jpg' - - no_stylesheets = True - auto_cleanup = True - handle_gzip = True - - feeds = [ - ('Top Stories', 'http://www.couriermail.com.au/rss'), - ('Breaking', 'https://www.couriermail.com.au/news/breaking-news/rss'), - ('Queensland', 'https://www.couriermail.com.au/news/queensland/rss'), - ('Technology', 'https://www.couriermail.com.au/technology/rss'), - ('Entertainment', 'https://www.couriermail.com.au/entertainment/rss'), - ('Finance','https://www.couriermail.com.au/business/rss'), - ('Sport', 'https://www.couriermail.com.au/sport/rss'), - ] - -# This isn't perfect, but works rather better than it once did. To do - remove links to subscription content. diff --git a/recipes/courrier.recipe b/recipes/courrier.recipe deleted file mode 100644 index 6e562ce8b9..0000000000 --- a/recipes/courrier.recipe +++ /dev/null @@ -1,28 +0,0 @@ -from calibre.ebooks.BeautifulSoup import BeautifulSoup -from calibre.web.feeds.news import BasicNewsRecipe - - -class CourierPress(BasicNewsRecipe): - title = u'Courier Press' - language = 'en' - __author__ = 'Krittika Goyal' - oldest_article = 1 # days - max_articles_per_feed = 25 - - remove_stylesheets = True - remove_tags = [ - dict(name='iframe'), - ] - - feeds = [ - ('Courier Press', - 'http://www.courierpress.com/rss/headlines/news/'), - ] - - def preprocess_html(self, soup): - story = soup.find(name='div', attrs={'id': 'article_body'}) - soup = BeautifulSoup( - 't') - body = soup.find(name='body') - body.insert(0, story) - return soup diff --git a/recipes/craigslist.recipe b/recipes/craigslist.recipe deleted file mode 100644 index 8630a0b8ad..0000000000 --- a/recipes/craigslist.recipe +++ /dev/null @@ -1,21 +0,0 @@ -#!/usr/bin/env python -# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai -from __future__ import with_statement - -__license__ = 'GPL v3' -__copyright__ = '2009, Kovid Goyal ' -__docformat__ = 'restructuredtext en' - -from calibre.web.feeds.news import BasicNewsRecipe - - -class CraigsList(BasicNewsRecipe): - title = u'craigslist - Best Of' - oldest_article = 365 - max_articles_per_feed = 100 - language = 'en' - - __author__ = 'kiodane' - - feeds = [(u'Best of craigslist', - u'http://www.craigslist.org/about/best/all/index.rss'), ] diff --git a/recipes/credit_slips.recipe b/recipes/credit_slips.recipe deleted file mode 100644 index be5b73b942..0000000000 --- a/recipes/credit_slips.recipe +++ /dev/null @@ -1,45 +0,0 @@ -#!/usr/bin/env python -__license__ = 'GPL 3' -__copyright__ = 'zotzo' -__docformat__ = 'restructuredtext en' - -from calibre.web.feeds.news import BasicNewsRecipe - - -class CreditSlips(BasicNewsRecipe): - language = 'en' - __author__ = 'zotzot' - version = 2 - title = u'Credit Slips.org' - publisher = u'Bankr-L' - category = u'Economic blog' - description = u'A discussion on credit and bankruptcy' - cover_url = 'http://bit.ly/eAKNCB' - oldest_article = 15 - max_articles_per_feed = 100 - use_embedded_content = True - no_stylesheets = True - remove_javascript = True - - conversion_options = { - 'comments': description, - 'tags': category, - 'language': 'en', - 'publisher': publisher, - } - - feeds = [ - (u'Credit Slips', u'http://www.creditslips.org/creditslips/atom.xml') - ] - - extra_css = ''' - .author {font-family:Helvetica,sans-serif; font-weight:normal;font-size:small;} - h1 {font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;} - p {font-family:Helvetica,Arial,sans-serif;font-size:small;} - body {font-family:Helvetica,Arial,sans-serif;font-size:small;} - ''' - - def populate_article_metadata(self, article, soup, first): - h2 = soup.find('h2') - h2.replaceWith(h2.prettify() + '

Posted by ' + - article.author + '

') diff --git a/recipes/cronica.recipe b/recipes/cronica.recipe deleted file mode 100644 index 7a7ba02b99..0000000000 --- a/recipes/cronica.recipe +++ /dev/null @@ -1,79 +0,0 @@ -# -*- mode: python -*- -# -*- coding: utf-8 -*- - -__license__ = 'GPL v3' -__copyright__ = '2018, Darko Miletic ' -''' -www.cronica.com.ar -''' - -from calibre.web.feeds.news import BasicNewsRecipe - - -class Veintitres(BasicNewsRecipe): - title = 'Cronica' - __author__ = 'Darko Miletic' - description = 'Últimas noticias' - publisher = 'Grupo Crónica' - category = 'politica, noticias generales, Argentina' - oldest_article = 15 - max_articles_per_feed = 100 - no_stylesheets = False - use_embedded_content = False - encoding = 'utf-8' - masthead_url = 'https://www.cronica.com.ar/export/sites/cronica/arte/logos/logoCronica.svg_799932565.svg' - language = 'es_AR' - remove_javascript = True - publication_type = 'magazine' - remove_empty_feeds = True - auto_cleanup = True - auto_cleanup_keep = '//h1' - resolve_internal_links = True - INDEX = "https://www.cronica.com.ar" - extra_css = """ - img{margin-bottom: 0.8em} - """ - - conversion_options = { - 'comment': description, - 'tags': category, - 'publisher': publisher, - 'language': language - } - - feeds = [ - (u'Policiales', u'https://www.cronica.com.ar/seccion/policiales/'), - (u'Politica', u'https://www.cronica.com.ar/seccion/politica/'), - (u'General', u'https://www.cronica.com.ar/seccion/info-general/'), - (u'Mundo', u'https://www.cronica.com.ar/seccion/mundo/'), - (u'Opinion', u'https://www.cronica.com.ar/seccion/opinion/'), - (u'Deportes', u'https://www.cronica.com.ar/seccion/deportes/'), - (u'Cosa de locos', u'https://www.cronica.com.ar/seccion/cosa-de-locos/'), - (u'Espectaculos', u'https://www.diarioshow.com/seccion/espectaculos/'), - ] - - def parse_index(self): - totalfeeds = [] - lfeeds = self.get_feeds() - for feedobj in lfeeds: - feedtitle, feedurl = feedobj - self.report_progress( - 0, - _('Fetching feed') + ' %s...' % - (feedtitle if feedtitle else feedurl) - ) - articles = [] - soup = self.index_to_soup(feedurl) - for item in soup.findAll('a', attrs={'class': 'cover-link'}): - url = self.INDEX + item['href'] - if feedtitle == 'Espectaculos': - url = 'https://www.diarioshow.com' + item['href'] - title = item['title'] - articles.append({ - 'title': title, - 'date': '', - 'url': url, - 'description': '' - }) - totalfeeds.append((feedtitle, articles)) - return totalfeeds diff --git a/recipes/curierulnational.recipe b/recipes/curierulnational.recipe deleted file mode 100644 index 59c845866e..0000000000 --- a/recipes/curierulnational.recipe +++ /dev/null @@ -1,50 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- - -__license__ = 'GPL v3' -__copyright__ = u'2011, Silviu Cotoar\u0103' -''' -curierulnational.ro -''' - -from calibre.web.feeds.news import BasicNewsRecipe - - -class CurierulNal(BasicNewsRecipe): - title = u'Curierul Na\u0163ional' - __author__ = u'Silviu Cotoar\u0103' - description = '' - publisher = 'Curierul Na\u0163ional' - oldest_article = 5 - language = 'ro' - max_articles_per_feed = 100 - no_stylesheets = True - use_embedded_content = False - category = 'Ziare,Stiri' - encoding = 'utf-8' - cover_url = 'http://www.curierulnational.ro/logo.gif' - - conversion_options = { - 'comments': description, 'tags': category, 'language': language, 'publisher': publisher - } - - keep_only_tags = [ - dict(name='div', attrs={'id': 'col1'}), dict( - name='img', attrs={'id': 'placeholder'}) - ] - - remove_tags = [ - dict(name='p', attrs={'id': ['alteArticole']}), dict(name='div', attrs={'id': ['textSize']}), dict( - name='ul', attrs={'class': ['unit-rating']}), dict(name='div', attrs={'id': ['comments']}) - ] - - remove_tags_after = [ - dict(name='ul', attrs={'class': 'unit-rating'}) - ] - - feeds = [ - (u'Feeds', u'http://www.curierulnational.ro/feed.xml') - ] - - def preprocess_html(self, soup): - return self.adeify_images(soup) diff --git a/recipes/cyberpresse.recipe b/recipes/cyberpresse.recipe deleted file mode 100644 index 7c0882055f..0000000000 --- a/recipes/cyberpresse.recipe +++ /dev/null @@ -1,56 +0,0 @@ -from calibre.web.feeds.news import BasicNewsRecipe - - -class Cyberpresse(BasicNewsRecipe): - - title = u'Cyberpresse' - __author__ = 'balok and Sujata Raman' - description = 'Canadian news in French' - language = 'fr' - - oldest_article = 7 - max_articles_per_feed = 100 - no_stylesheets = True - remove_javascript = True - html2lrf_options = ['--left-margin=0', '--right-margin=0', - '--top-margin=0', '--bottom-margin=0'] - encoding = 'utf-8' - - keep_only_tags = [dict(name='div', attrs={'class': 'article-page'}), - dict(name='div', attrs={'id': 'articlePage'}), - ] - - extra_css = ''' - .photodata{font-family:Arial,Helvetica,Verdana,sans-serif;color: #999999; font-size: 90%; } - h1{font-family:Georgia,Times,serif ; font-size: large; } - .amorce{font-family:Arial,Helvetica,Verdana,sans-serif; font-weight:bold;} - .article-page{font-family:Arial,Helvetica,Verdana,sans-serif; font-size: x-small;} - #articlePage{font-family:Arial,Helvetica,Verdana,sans-serif; font-size: x-small;} - .auteur{font-family:Georgia,Times,sans-serif; font-size: 90%; color:#006699 ;} - .bodyText{font-family:Arial,Helvetica,Verdana,sans-serif; font-size: x-small;} - .byLine{font-family:Arial,Helvetica,Verdana,sans-serif; font-size: 90%;} - .entry{font-family:Arial,Helvetica,Verdana,sans-serif; font-size: x-small;} - .minithumb-auteurs{font-family:Arial,Helvetica,Verdana,sans-serif; font-size: 90%; } - a{color:#003399; font-weight:bold; } - ''' - - remove_tags = [ - dict(name='div', attrs={ - 'class': ['centerbar', 'colspan', 'share-module']}), - dict(name='p', attrs={'class': ['zoom']}), - dict(name='ul', attrs={'class': ['stories']}), - dict(name='h4', attrs={'class': ['general-cat']}), - ] - - feeds = [(u'Manchettes', u'http://www.cyberpresse.ca/rss/225.xml'), - (u'Capitale nationale', u'http://www.cyberpresse.ca/rss/501.xml'), - (u'Opinions', u'http://www.cyberpresse.ca/rss/977.xml'), - (u'Insolite', u'http://www.cyberpresse.ca/rss/279.xml') - ] - - def postprocess_html(self, soup, first): - - for tag in soup.findAll(name=['i', 'strong']): - tag.name = 'div' - - return soup diff --git a/recipes/cynewslive.recipe b/recipes/cynewslive.recipe deleted file mode 100644 index c05a35c8c0..0000000000 --- a/recipes/cynewslive.recipe +++ /dev/null @@ -1,118 +0,0 @@ -from datetime import datetime, timedelta - -from calibre.web.feeds.news import BasicNewsRecipe - - -class CyNewsLiveRecipe(BasicNewsRecipe): - __license__ = 'GPL v3' - __author__ = 'kwetal' - language = 'en_CY' - version = 1 - - title = u'Cyprus News Live' - publisher = u'The Cyprus Weekly' - category = u'News, Newspaper' - description = u'News from Cyprus' - - use_embedded_content = False - remove_empty_feeds = True - oldest_article = 7 - max_articles_per_feed = 100 - - no_stylesheets = True - remove_javascript = True - - pubTime = None - minTime = None - articleCount = 0 - - INDEX = 'http://www.cynewslive.com' - - feeds = [] - feeds.append( - ('News: Cyprus', 'http://www.cynewslive.com/main/92,0,0,0-CYPRUS.aspx')) - feeds.append( - ('News: World', 'http://www.cynewslive.com/main/78,0,0,0-UKWORLD.aspx')) - feeds.append( - ('Sport: Football', 'http://www.cynewslive.com/main/82,0,0,0-FOOTBALL.aspx')) - feeds.append( - ('Sport: Rugby', 'http://www.cynewslive.com/main/83,0,0,0-RUGBY.aspx')) - feeds.append( - ('Sport: Cricket', 'http://www.cynewslive.com/main/85,0,0,0-CRICKET.aspx')) - feeds.append( - ('Sport: Tennis', 'http://www.cynewslive.com/main/84,0,0,0-TENNIS.aspx')) - feeds.append( - ('Sport: Other', 'http://www.cynewslive.com/main/86,0,0,0-OTHER.aspx')) - feeds.append( - ('Business: Local', 'http://www.cynewslive.com/main/100,0,0,0-LOCAL.aspx')) - feeds.append( - ('Business: Foreign', 'http://www.cynewslive.com/main/101,0,0,0-FOREIGN.aspx')) - feeds.append( - ('Environment', 'http://www.cynewslive.com/main/93,0,0,0-ENVIRONMENT.aspx')) - feeds.append( - ('Culture', 'http://www.cynewslive.com/main/208,0,0,0-CULTURE.aspx')) - - keep_only_tags = [] - keep_only_tags.append( - dict(name='div', attrs={'class': 'ArticleCategories'})) - - extra_css = ''' - body{font-family:verdana,arial,helvetica,geneva,sans-serif ;} - ''' - - def parse_index(self): - answer = [] - for feed in self.feeds: - self.articleCount = 0 - articles = [] - soup = self.index_to_soup(feed[1]) - - table = soup.find('table', attrs={'id': 'ctl00_cp_ctl01_listp'}) - if table: - self.pubTime = datetime.now() - self.minTime = self.pubTime - \ - timedelta(days=self.oldest_article) - - self.find_articles(table, articles) - - answer.append((feed[0], articles)) - - return answer - - def postprocess_html(self, soup, first): - for el in soup.findAll(attrs={'style': True}): - del el['style'] - - for el in soup.findAll('font'): - el.name = 'div' - for attr, value in el: - del el[attr] - - return soup - - def find_articles(self, table, articles): - for div in table.findAll('div', attrs={'class': 'ListArticle'}): - el = div.find('div', attrs={'class': 'ListArticle_T'}) - title = self.tag_to_string(el.a) - url = self.INDEX + el.a['href'] - - description = self.tag_to_string( - div.find('div', attrs={'class': 'ListArticle_BODY300'})) - - el = div.find('div', attrs={'class': 'ListArticle_D'}) - if el: - dateParts = self.tag_to_string(el).split(' ') - monthNames = {'January': 1, 'February': 2, 'March': 3, 'April': 4, 'May': 5, 'June': 6, - 'July': 7, 'August': 8, 'September': 9, 'October': 10, 'November': 11, - 'December': 12} - timeParts = dateParts[3].split(':') - self.pubTime = datetime(year=int(dateParts[2]), month=int(monthNames[dateParts[1]]), - day=int(dateParts[0]), hour=int(timeParts[0]), - minute=int(timeParts[1])) - - if self.pubTime >= self.minTime and self.articleCount <= self.max_articles_per_feed: - articles.append( - {'title': title, 'date': self.pubTime, 'url': url, 'description': description}) - self.articleCount += 1 - else: - return diff --git a/recipes/icons/balkanist.png b/recipes/icons/balkanist.png deleted file mode 100644 index 72c6d25b9e..0000000000 Binary files a/recipes/icons/balkanist.png and /dev/null differ diff --git a/recipes/icons/bangkok_biz.png b/recipes/icons/bangkok_biz.png deleted file mode 100644 index d531a38ee6..0000000000 Binary files a/recipes/icons/bangkok_biz.png and /dev/null differ diff --git a/recipes/icons/bay_citizen.png b/recipes/icons/bay_citizen.png deleted file mode 100644 index 997b74038f..0000000000 Binary files a/recipes/icons/bay_citizen.png and /dev/null differ diff --git a/recipes/icons/beppe_grillo.png b/recipes/icons/beppe_grillo.png deleted file mode 100644 index b6c3414584..0000000000 Binary files a/recipes/icons/beppe_grillo.png and /dev/null differ diff --git a/recipes/icons/berliner_zeitung.png b/recipes/icons/berliner_zeitung.png deleted file mode 100644 index f8f681ba2a..0000000000 Binary files a/recipes/icons/berliner_zeitung.png and /dev/null differ diff --git a/recipes/icons/berlingske_dk.png b/recipes/icons/berlingske_dk.png deleted file mode 100644 index 456346fb4a..0000000000 Binary files a/recipes/icons/berlingske_dk.png and /dev/null differ diff --git a/recipes/icons/bighollywood.png b/recipes/icons/bighollywood.png deleted file mode 100644 index 4af5ec55b2..0000000000 Binary files a/recipes/icons/bighollywood.png and /dev/null differ diff --git a/recipes/icons/birmingham_post.png b/recipes/icons/birmingham_post.png deleted file mode 100644 index e899d9e4b5..0000000000 Binary files a/recipes/icons/birmingham_post.png and /dev/null differ diff --git a/recipes/icons/biz_portal.png b/recipes/icons/biz_portal.png deleted file mode 100644 index 170ec43d99..0000000000 Binary files a/recipes/icons/biz_portal.png and /dev/null differ diff --git a/recipes/icons/boortz.png b/recipes/icons/boortz.png deleted file mode 100644 index 1c9e0680a0..0000000000 Binary files a/recipes/icons/boortz.png and /dev/null differ diff --git a/recipes/icons/borse_online.png b/recipes/icons/borse_online.png deleted file mode 100644 index 6fa8875650..0000000000 Binary files a/recipes/icons/borse_online.png and /dev/null differ diff --git a/recipes/icons/brand_eins.png b/recipes/icons/brand_eins.png deleted file mode 100644 index 2064fb2ff5..0000000000 Binary files a/recipes/icons/brand_eins.png and /dev/null differ diff --git a/recipes/icons/brasil_de_fato.png b/recipes/icons/brasil_de_fato.png deleted file mode 100644 index f19a37594f..0000000000 Binary files a/recipes/icons/brasil_de_fato.png and /dev/null differ diff --git a/recipes/icons/brecha.png b/recipes/icons/brecha.png deleted file mode 100644 index 90eb75ba3f..0000000000 Binary files a/recipes/icons/brecha.png and /dev/null differ diff --git a/recipes/icons/bsi_news.png b/recipes/icons/bsi_news.png deleted file mode 100644 index 01f884391b..0000000000 Binary files a/recipes/icons/bsi_news.png and /dev/null differ diff --git a/recipes/icons/buchreport.png b/recipes/icons/buchreport.png deleted file mode 100644 index f14a105f3e..0000000000 Binary files a/recipes/icons/buchreport.png and /dev/null differ diff --git a/recipes/icons/buckmasters.png b/recipes/icons/buckmasters.png deleted file mode 100644 index 98b85a85eb..0000000000 Binary files a/recipes/icons/buckmasters.png and /dev/null differ diff --git a/recipes/icons/buenosaireseconomico.png b/recipes/icons/buenosaireseconomico.png deleted file mode 100644 index a3185f7088..0000000000 Binary files a/recipes/icons/buenosaireseconomico.png and /dev/null differ diff --git a/recipes/icons/buffalonews.png b/recipes/icons/buffalonews.png deleted file mode 100644 index ab40b9e253..0000000000 Binary files a/recipes/icons/buffalonews.png and /dev/null differ diff --git a/recipes/icons/businessworldin.png b/recipes/icons/businessworldin.png deleted file mode 100644 index b20e729f6a..0000000000 Binary files a/recipes/icons/businessworldin.png and /dev/null differ diff --git a/recipes/icons/cafcaf_dergisi.png b/recipes/icons/cafcaf_dergisi.png deleted file mode 100644 index 46529cb072..0000000000 Binary files a/recipes/icons/cafcaf_dergisi.png and /dev/null differ diff --git a/recipes/icons/camera_di_commercio_di_bari.png b/recipes/icons/camera_di_commercio_di_bari.png deleted file mode 100644 index 8884148798..0000000000 Binary files a/recipes/icons/camera_di_commercio_di_bari.png and /dev/null differ diff --git a/recipes/icons/capital.png b/recipes/icons/capital.png deleted file mode 100644 index 6922c7a3ff..0000000000 Binary files a/recipes/icons/capital.png and /dev/null differ diff --git a/recipes/icons/capital_de.png b/recipes/icons/capital_de.png deleted file mode 100644 index 1d7343d2c2..0000000000 Binary files a/recipes/icons/capital_de.png and /dev/null differ diff --git a/recipes/icons/caravan_magazine_hindi.png b/recipes/icons/caravan_magazine_hindi.png deleted file mode 100644 index fdf7bb4bf5..0000000000 Binary files a/recipes/icons/caravan_magazine_hindi.png and /dev/null differ diff --git a/recipes/icons/carta.png b/recipes/icons/carta.png deleted file mode 100644 index d1b55b368b..0000000000 Binary files a/recipes/icons/carta.png and /dev/null differ diff --git a/recipes/icons/catholic_daily_readings.png b/recipes/icons/catholic_daily_readings.png deleted file mode 100644 index a602ffe118..0000000000 Binary files a/recipes/icons/catholic_daily_readings.png and /dev/null differ diff --git a/recipes/icons/cd_action.png b/recipes/icons/cd_action.png deleted file mode 100644 index ac72cb15c3..0000000000 Binary files a/recipes/icons/cd_action.png and /dev/null differ diff --git a/recipes/icons/cdrinfo_pl.png b/recipes/icons/cdrinfo_pl.png deleted file mode 100644 index fe7dd0dedc..0000000000 Binary files a/recipes/icons/cdrinfo_pl.png and /dev/null differ diff --git a/recipes/icons/ceskapozice.png b/recipes/icons/ceskapozice.png deleted file mode 100644 index b9af006521..0000000000 Binary files a/recipes/icons/ceskapozice.png and /dev/null differ diff --git a/recipes/icons/cesky_rozhlas_6.png b/recipes/icons/cesky_rozhlas_6.png deleted file mode 100644 index 00c62b6454..0000000000 Binary files a/recipes/icons/cesky_rozhlas_6.png and /dev/null differ diff --git a/recipes/icons/cgm_pl.png b/recipes/icons/cgm_pl.png deleted file mode 100644 index 711ec60617..0000000000 Binary files a/recipes/icons/cgm_pl.png and /dev/null differ diff --git a/recipes/icons/chetnixploitation.png b/recipes/icons/chetnixploitation.png deleted file mode 100644 index 04c29c68dd..0000000000 Binary files a/recipes/icons/chetnixploitation.png and /dev/null differ diff --git a/recipes/icons/chicago_breaking_news.png b/recipes/icons/chicago_breaking_news.png deleted file mode 100644 index 5035e73fae..0000000000 Binary files a/recipes/icons/chicago_breaking_news.png and /dev/null differ diff --git a/recipes/icons/china_economic_net.png b/recipes/icons/china_economic_net.png deleted file mode 100644 index aaa81c8b4b..0000000000 Binary files a/recipes/icons/china_economic_net.png and /dev/null differ diff --git a/recipes/icons/china_times.png b/recipes/icons/china_times.png deleted file mode 100644 index 22cfdfff1f..0000000000 Binary files a/recipes/icons/china_times.png and /dev/null differ diff --git a/recipes/icons/chipro.png b/recipes/icons/chipro.png deleted file mode 100644 index 77ef6fdbd6..0000000000 Binary files a/recipes/icons/chipro.png and /dev/null differ diff --git a/recipes/icons/chosun.png b/recipes/icons/chosun.png deleted file mode 100644 index 58a17eea2d..0000000000 Binary files a/recipes/icons/chosun.png and /dev/null differ diff --git a/recipes/icons/cinco_dias.png b/recipes/icons/cinco_dias.png deleted file mode 100644 index ebba30cf02..0000000000 Binary files a/recipes/icons/cinco_dias.png and /dev/null differ diff --git a/recipes/icons/cinebel_be.png b/recipes/icons/cinebel_be.png deleted file mode 100644 index f4379714a9..0000000000 Binary files a/recipes/icons/cinebel_be.png and /dev/null differ diff --git a/recipes/icons/cio.png b/recipes/icons/cio.png deleted file mode 100644 index 1b34faa85f..0000000000 Binary files a/recipes/icons/cio.png and /dev/null differ diff --git a/recipes/icons/cio_magazine.png b/recipes/icons/cio_magazine.png deleted file mode 100644 index 576eaf91e5..0000000000 Binary files a/recipes/icons/cio_magazine.png and /dev/null differ diff --git a/recipes/icons/cityavisen_dk.png b/recipes/icons/cityavisen_dk.png deleted file mode 100644 index 6fc915ca3d..0000000000 Binary files a/recipes/icons/cityavisen_dk.png and /dev/null differ diff --git a/recipes/icons/cjr.png b/recipes/icons/cjr.png deleted file mode 100644 index d780d82759..0000000000 Binary files a/recipes/icons/cjr.png and /dev/null differ diff --git a/recipes/icons/clarion_ledger.png b/recipes/icons/clarion_ledger.png deleted file mode 100644 index 0087d74754..0000000000 Binary files a/recipes/icons/clarion_ledger.png and /dev/null differ diff --git a/recipes/icons/clic_rbs.png b/recipes/icons/clic_rbs.png deleted file mode 100644 index 3dae6f3730..0000000000 Binary files a/recipes/icons/clic_rbs.png and /dev/null differ diff --git a/recipes/icons/coding_horror.png b/recipes/icons/coding_horror.png deleted file mode 100644 index 1a2db43246..0000000000 Binary files a/recipes/icons/coding_horror.png and /dev/null differ diff --git a/recipes/icons/columbusdispatch.png b/recipes/icons/columbusdispatch.png deleted file mode 100644 index ba4f2250ba..0000000000 Binary files a/recipes/icons/columbusdispatch.png and /dev/null differ diff --git a/recipes/icons/computerworld_pl.png b/recipes/icons/computerworld_pl.png deleted file mode 100644 index f478419743..0000000000 Binary files a/recipes/icons/computerworld_pl.png and /dev/null differ diff --git a/recipes/icons/consortium_news.png b/recipes/icons/consortium_news.png deleted file mode 100644 index 2ceed69a46..0000000000 Binary files a/recipes/icons/consortium_news.png and /dev/null differ diff --git a/recipes/icons/contemporary_argentine_writers.png b/recipes/icons/contemporary_argentine_writers.png deleted file mode 100644 index 6e1793fb9d..0000000000 Binary files a/recipes/icons/contemporary_argentine_writers.png and /dev/null differ diff --git a/recipes/icons/corriere_della_sera_en.png b/recipes/icons/corriere_della_sera_en.png deleted file mode 100644 index 90df7f4d16..0000000000 Binary files a/recipes/icons/corriere_della_sera_en.png and /dev/null differ diff --git a/recipes/icons/corriere_dello_sport.png b/recipes/icons/corriere_dello_sport.png deleted file mode 100644 index 08d1c861b0..0000000000 Binary files a/recipes/icons/corriere_dello_sport.png and /dev/null differ diff --git a/recipes/icons/cosmopolitan.png b/recipes/icons/cosmopolitan.png deleted file mode 100644 index 7a9a423183..0000000000 Binary files a/recipes/icons/cosmopolitan.png and /dev/null differ diff --git a/recipes/icons/cosmopolitan_de.png b/recipes/icons/cosmopolitan_de.png deleted file mode 100644 index d86aa017c8..0000000000 Binary files a/recipes/icons/cosmopolitan_de.png and /dev/null differ diff --git a/recipes/icons/cosmopolitan_uk.png b/recipes/icons/cosmopolitan_uk.png deleted file mode 100644 index f73fbf673f..0000000000 Binary files a/recipes/icons/cosmopolitan_uk.png and /dev/null differ diff --git a/recipes/icons/cotidianul.png b/recipes/icons/cotidianul.png deleted file mode 100644 index d3cfcf6451..0000000000 Binary files a/recipes/icons/cotidianul.png and /dev/null differ diff --git a/recipes/icons/counterpunch.png b/recipes/icons/counterpunch.png deleted file mode 100644 index ba299c016e..0000000000 Binary files a/recipes/icons/counterpunch.png and /dev/null differ diff --git a/recipes/icons/countryfile.png b/recipes/icons/countryfile.png deleted file mode 100644 index 10f1e9be82..0000000000 Binary files a/recipes/icons/countryfile.png and /dev/null differ diff --git a/recipes/icons/courier_mail.png b/recipes/icons/courier_mail.png deleted file mode 100644 index 6478f64f49..0000000000 Binary files a/recipes/icons/courier_mail.png and /dev/null differ diff --git a/recipes/icons/courrier.png b/recipes/icons/courrier.png deleted file mode 100644 index 3bf9224330..0000000000 Binary files a/recipes/icons/courrier.png and /dev/null differ diff --git a/recipes/icons/credit_slips.png b/recipes/icons/credit_slips.png deleted file mode 100644 index e5aa786bf4..0000000000 Binary files a/recipes/icons/credit_slips.png and /dev/null differ diff --git a/recipes/icons/cronica.png b/recipes/icons/cronica.png deleted file mode 100644 index e0e9680da8..0000000000 Binary files a/recipes/icons/cronica.png and /dev/null differ diff --git a/recipes/icons/curierulnational.png b/recipes/icons/curierulnational.png deleted file mode 100644 index 62262e12e3..0000000000 Binary files a/recipes/icons/curierulnational.png and /dev/null differ diff --git a/recipes/icons/cyberpresse.png b/recipes/icons/cyberpresse.png deleted file mode 100644 index f3789fbceb..0000000000 Binary files a/recipes/icons/cyberpresse.png and /dev/null differ