diff --git a/recipes/f1_ultra.recipe b/recipes/f1_ultra.recipe deleted file mode 100644 index 030f8acf78..0000000000 --- a/recipes/f1_ultra.recipe +++ /dev/null @@ -1,40 +0,0 @@ -import re - -from calibre.web.feeds.news import BasicNewsRecipe - - -class f1ultra(BasicNewsRecipe): - title = u'Formuła 1 - F1 ultra' - __license__ = 'GPL v3' - __author__ = 'MrStefan , Artur Stachecki ' - language = 'pl' - description = u'Formuła 1, Robert Kubica, F3, GP2 oraz inne serie wyścigowe.' - masthead_url = 'http://www.f1ultra.pl/templates/f1ultra/images/logo.gif' - remove_empty_feeds = True - oldest_article = 1 - max_articles_per_feed = 100 - remove_javascript = True - no_stylesheets = True - - keep_only_tags = [(dict(name='div', attrs={'id': 'main'}))] - remove_tags_after = [ - dict(attrs={'style': 'margin-top:5px;margin-bottom:5px;display: inline;'})] - remove_tags = [ - (dict(attrs={'class': ['buttonheading', 'avPlayerContainer', 'createdate']}))] - remove_tags.append(dict(attrs={'title': ['PDF', 'Drukuj', 'Email']})) - remove_tags.append(dict(name='form', attrs={'method': 'post'})) - remove_tags.append(dict(name='hr', attrs={'size': '2'})) - - preprocess_regexps = [(re.compile(r'align="left"'), lambda match: ''), - (re.compile(r'align="right"'), lambda match: ''), - (re.compile(r'width=\"*\"'), lambda match: ''), - (re.compile(r'\'), lambda match: '')] - - extra_css = '''.contentheading { font-size: 1.4em; font-weight: bold; } - img { display: block; clear: both;} - ''' - remove_attributes = ['width', 'height', 'position', 'float', - 'padding-left', 'padding-right', 'padding', 'text-align'] - - feeds = [ - (u'F1 Ultra', u'http://www.f1ultra.pl/index.php?option=com_rd_rss&id=1&Itemid=245')] diff --git a/recipes/f_secure.recipe b/recipes/f_secure.recipe deleted file mode 100644 index 8b786acc8f..0000000000 --- a/recipes/f_secure.recipe +++ /dev/null @@ -1,23 +0,0 @@ -from calibre.web.feeds.news import BasicNewsRecipe - - -class AdvancedUserRecipe1301860159(BasicNewsRecipe): - title = u'F-Secure Weblog' - language = 'en' - __author__ = 'louhike' - description = u'All the news from the weblog of F-Secure' - publisher = u'F-Secure' - timefmt = ' [%a, %d %b, %Y]' - encoding = 'ISO-8859-1' - oldest_article = 7 - max_articles_per_feed = 100 - no_stylesheets = True - use_embedded_content = False - remove_javascript = True - keep_only_tags = [dict(name='div', attrs={'class': 'modSectionTd2'})] - remove_tags = [dict(name='hr')] - - feeds = [(u'Weblog', u'http://www.f-secure.com/weblog/weblog.rss')] - - def get_cover_url(self): - return 'http://www.f-secure.com/weblog/archives/images/company_logo.png' diff --git a/recipes/favrskovavisen_dk.recipe b/recipes/favrskovavisen_dk.recipe deleted file mode 100644 index cb82bed688..0000000000 --- a/recipes/favrskovavisen_dk.recipe +++ /dev/null @@ -1,26 +0,0 @@ -#!/usr/bin/env python -# vim:fileencoding=utf-8 -# https://manual.calibre-ebook.com/news_recipe.html -from __future__ import absolute_import, division, print_function, unicode_literals - -from calibre.web.feeds.news import BasicNewsRecipe - -''' -Favrskov Avisen -''' - - -class FavrskovAvisen_dk(BasicNewsRecipe): - __author__ = 'CoderAllan.github.com' - title = 'Favrskov Avisen' - description = 'Lokale og regionale nyheder' - category = 'newspaper, news, localnews, Denmark' - oldest_article = 7 - max_articles_per_feed = 50 - auto_cleanup = True - language = 'da' - - feeds = [ - ('Nyheder', 'http://dinby.dk/favrskov-avisen/rss'), - ] - diff --git a/recipes/favrskovlokalavisen_dk.recipe b/recipes/favrskovlokalavisen_dk.recipe deleted file mode 100644 index a74e8ac4d4..0000000000 --- a/recipes/favrskovlokalavisen_dk.recipe +++ /dev/null @@ -1,32 +0,0 @@ -#!/usr/bin/env python -# vim:fileencoding=utf-8 -# https://manual.calibre-ebook.com/news_recipe.html -from __future__ import absolute_import, division, print_function, unicode_literals - -from calibre.web.feeds.news import BasicNewsRecipe - -''' -Favrskovposten -''' - - -class FavrskovLokalavisen_dk(BasicNewsRecipe): - __author__ = 'CoderAllan.github.com' - title = 'Favrskovposten' - description = 'Lokale og regionale nyheder, sport og kultur fra Favrskov og omegn på favrskov.lokalavisen.dk' - category = 'newspaper, news, localnews, sport, culture, Denmark' - oldest_article = 7 - max_articles_per_feed = 50 - auto_cleanup = True - language = 'da' - - feeds = [ - ('Seneste nyt fra Favrskovposten', 'http://favrskov.lokalavisen.dk/section/senestenytrss'), - ('Seneste lokale nyheder fra Favrskovposten', 'http://favrskov.lokalavisen.dk/section/senestelokalenyhederrss'), - ('Seneste sport fra Favrskovposten', 'http://favrskov.lokalavisen.dk/section/senestesportrss'), - ('Seneste 112 nyheder fra Favrskovposten', 'http://favrskov.lokalavisen.dk/section/seneste112rss'), - ('Seneste kultur nyheder fra Favrskovposten', 'http://favrskov.lokalavisen.dk/section/senestekulturrss'), - ('Seneste læserbreve fra Favrskovposten', 'http://favrskov.lokalavisen.dk/section/senestelaeserbreverss'), - - ] - diff --git a/recipes/faznet.recipe b/recipes/faznet.recipe deleted file mode 100644 index cc59609f70..0000000000 --- a/recipes/faznet.recipe +++ /dev/null @@ -1,134 +0,0 @@ -# vim:fileencoding=utf-8 -from __future__ import absolute_import, division, print_function, unicode_literals - -from calibre.web.feeds.news import BasicNewsRecipe - -__license__ = 'GPL v3' -__copyright__ = '2008-2011, Kovid Goyal , Darko Miletic ' - - -class FazNet(BasicNewsRecipe): - # Version 9.1 - # Update 2022-05-29 - # Armin Geller - # new page layout - - title = 'FAZ.NET' - __author__ = 'Kovid Goyal, Darko Miletic, Armin Geller' - description = 'Frankfurter Allgemeine Zeitung' - publisher = 'Frankfurter Allgemeine Zeitung GmbH' - category = 'news, politics, Germany' - - encoding = 'utf-8' - language = 'de' - - max_articles_per_feed = 30 - no_stylesheets = True - remove_javascript = True - - extra_css = ''' - .atc-headlineemphasis, h1, h2 {font-size:1.6em; text-align:left} - .atc-HeadlineEmphasisText {font-size:0.6em; text-align:left; display:block; text-transform:uppercase;} - .atc-IntroText {font-size:1em; font-style:italic; font-weight:bold;margin-bottom:1em} - h3 {font-size:1.3em;text-align:left} - h4, h5, h6 {font-size:1em;text-align:left} - .textbox-wide {font-size:1.3em; font-style:italic} - .atc-ImageDescriptionText, .atc-ImageDescriptionCopyright {font-size: 0.75em; font-style:italic; font-weight:normal} - .atc-MetaItem { - font-size:0.6em; font-weight:normal; margin-bottom:0.75em; text-align:left; - list-style-type:none; text-transform:uppercase; display:inline-block} - .aut-Teaser_Avatar {font-size:0.6em; font-weight:bold; margin-bottom:0.75em; text-align:left} - .aut-Teaser_Name {font-size:0.6em; font-weight:bold; margin-bottom:0.75em; float:left; text-align:left} - .aut-Teaser_Description {font-size:0.6em; font-weight: normal; margin-bottom:0.75em; text-align:left; display:block} - .atc-Footer{font-size:0.6em; font-weight: normal; margin-bottom:0.75em; display:block} - ''' - - keep_only_tags = [dict(name='article', attrs={'class':'atc'}), - dict(name='div', attrs={'id':'FAZContent'}) - ] - - remove_tags_after = [dict(name='article', attrs={'class':'atc'})] - - remove_tags = [ - dict(name='div', attrs={'class':[ - 'atc-ContainerSocialMedia', - 'atc-ContainerFunctions_Interaction ', - 'ctn-PlaceholderContent ctn-PlaceholderContent-is-in-article-medium', - 'ctn-PlaceholderContent ctn-PlaceholderContent-is-in-article-medium ctn-PlaceholderContent-has-centered-content', - 'ctn-PlaceholderBox ctn-PlaceholderBox-is-in-article-text-right', - 'ctn-PlaceholderContent ctn-PlaceholderContent-is-in-article-text-left ctn-PlaceholderContent-is-in-article-small', - 'aut-Follow aut-Follow-is-small-teaser', - 'aut-Follow aut-Follow-is-teaser', - 'js-ctn-PaywallTeasers ctn-PaywallTeasers', - 'ctn-PaywallInfo_TeaserImageContainer', - 'ctn-PaywallInfo_OfferContainer' - ]}), - dict(name='aside', attrs={'class':['atc-ContainerMore', - 'atc-ContainerMoreOneTeaser' - ]}), - dict(name='span', attrs={'class':['data-button', - 'o-VisuallyHidden' - ]}), - dict(name='a', attrs={'class':'btn-Base_Link'}) - ] - - feeds = [ - ('FAZ.NET Aktuell', 'http://www.faz.net/aktuell/?rssview=1'), - ('Politik', 'http://www.faz.net/aktuell/politik/?rssview=1'), - ('Wirtschaft', 'http://www.faz.net/aktuell/wirtschaft/?rssview=1'), - ('Feuilleton', 'http://www.faz.net/aktuell/feuilleton/?rssview=1'), - ('Sport', 'http://www.faz.net/aktuell/sport/?rssview=1'), - ('Lebensstil', 'http://www.faz.net/aktuell/lebensstil/?rssview=1'), - ('Gesellschaft', 'http://www.faz.net/aktuell/gesellschaft/?rssview=1'), - ('Finanzen', 'http://www.faz.net/aktuell/finanzen/?rssview=1'), - ('Technik & Motor', 'http://www.faz.net/aktuell/technik-motor/?rssview=1'), - ('Wissen', 'http://www.faz.net/aktuell/wissen/?rssview=1'), - ('Reise', 'http://www.faz.net/aktuell/reise/?rssview=1'), - ('Beruf & Chance', 'http://www.faz.net/aktuell/beruf-chance/?rssview=1'), - ('Rhein-Main', 'http://www.faz.net/aktuell/rhein-main/?rssview=1') - ] - - # For multipages: - - INDEX = '' - - def append_page(self, soup, appendtag, position): - pager = soup.find('li',attrs={'class':'nvg-Paginator_Item nvg-Paginator_Item-to-next-page'}) - if pager: - nexturl = self.INDEX + pager.a['href'] - soup2 = self.index_to_soup(nexturl) - texttag = soup2.find('article', attrs={'class':'atc'}) - for cls in ( - 'atc-Header', - 'atc-ContainerMore', - 'atc-ContainerFunctions_Interaction', - 'aut-Follow aut-Follow-is-small-teaser', - 'aut-Follow aut-Follow-is-teaser' - ): - div = texttag.find(attrs={'class':cls}) - if div is not None: - div.extract() - newpos = len(texttag.contents) - self.append_page(soup2,texttag,newpos) - texttag.extract() - pager.extract() - appendtag.insert(position,texttag) - - # Find images - - def preprocess_html(self, soup): - self.append_page(soup, soup.body, 3) - for img in soup.findAll('img', attrs={'data-retina-src':True}): - img['src'] = img['data-retina-src'] - for img in soup.findAll('img', attrs={'data-src':True}): - img['src'] = img['data-src'] - return self.adeify_images(soup) - - # Some last cleanup - - def postprocess_html(self, soup, first_fetch): - for div in soup.findAll('div',attrs={'class':['atc-ContainerFunctions js-som-Abbinder', - 'ctn-PlaceholderContent ctn-PlaceholderContent-is-in-article-medium' - ]}): - div.extract() - return soup diff --git a/recipes/fc_knudde.recipe b/recipes/fc_knudde.recipe deleted file mode 100644 index 6febdbd972..0000000000 --- a/recipes/fc_knudde.recipe +++ /dev/null @@ -1,21 +0,0 @@ -from calibre.web.feeds.news import BasicNewsRecipe - - -class AdvancedUserRecipe1347706704(BasicNewsRecipe): - title = u'FC Knudde' - __author__ = u'DrMerry' - description = u'FC Knudde de populaire sport strip van Toon van Driel (http://www.toonvandriel.nl)' - language = u'nl' - oldest_article = 7 - max_articles_per_feed = 100 - auto_cleanup = False - cover_url = 'http://a1.mzstatic.com/us/r1000/035/Purple/be/33/70/mzl.qkvshinq.320x480-75.jpg' - no_stylesheets = True - remove_javascript = True - remove_empty_feeds = True - remove_tags_before = dict(id='title') - remove_tags_after = dict(attrs={'class': 'entry-content rich-content'}) - use_embedded_content = True - extra_css = 'img{border:0;padding:0;margin:0;width:100%}' - - feeds = [(u'FC Knudde', u'http://www.nusport.nl/feeds/rss/fc-knudde.rss')] diff --git a/recipes/fdb_pl.recipe b/recipes/fdb_pl.recipe deleted file mode 100644 index 3dbd074f1a..0000000000 --- a/recipes/fdb_pl.recipe +++ /dev/null @@ -1,47 +0,0 @@ -__license__ = 'GPL v3' -from calibre.web.feeds.news import BasicNewsRecipe - - -class FDBPl(BasicNewsRecipe): - title = u'Fdb.pl' - __author__ = 'fenuks' - description = u'Wiadomości ze świata filmu, baza danych filmowych, recenzje, zwiastuny, boxoffice.' - category = 'film' - language = 'pl' - extra_css = '.options-left > li {display: inline;} em {display: block;}' - cover_url = 'https://i1.fdbimg.pl/hygg2xp1/480x300_magq39.jpg' - use_embedded_content = False - oldest_article = 7 - max_articles_per_feed = 100 - no_stylesheets = True - remove_empty_feeds = True - remove_javascript = True - remove_attributes = ['style', 'font'] - ignore_duplicate_articles = {'title', 'url'} - - keep_only_tags = [dict(attrs={'class': ['row justify-content-center', 'figure']})] - remove_tags = [ - dict(attrs={'class': ['news-footer infinite-scroll-breakepoit', 'list-inline text-muted m-0']})] - feeds = [] - - def parse_index(self): - feeds = [] - feeds.append((u'Wiadomości', self.get_articles( - 'https://fdb.pl/wiadomosci?page={0}', 2))) - return feeds - - def get_articles(self, url, pages=1): - articles = [] - for nr in range(1, pages + 1): - soup = self.index_to_soup(url.format(nr)) - for tag in soup.findAll(attrs={'class': 'col-xs-6 col-sm-4 col-md-4 col-lg-3'}): - node = tag.find('h5') - title = node.a.string - url = node.a['href'] - date = '' - articles.append({'title': title, - 'url': url, - 'date': date, - 'description': '' - }) - return articles diff --git a/recipes/fe_india.recipe b/recipes/fe_india.recipe deleted file mode 100644 index 1c835cb6e2..0000000000 --- a/recipes/fe_india.recipe +++ /dev/null @@ -1,75 +0,0 @@ -__license__ = 'GPL v3' -__copyright__ = '2010, Darko Miletic ' -''' -financialexpress.com -''' - -from calibre.web.feeds.news import BasicNewsRecipe - - -def classes(classes): - q = frozenset(classes.split(' ')) - return dict(attrs={ - 'class': lambda x: x and frozenset(x.split()).intersection(q)}) - - -class FE_India(BasicNewsRecipe): - title = 'The Financial Express' - __author__ = 'Darko Miletic' - description = 'Financial news from India' - publisher = 'The Indian Express Limited' - category = 'news, politics, finances, India' - oldest_article = 2 - max_articles_per_feed = 200 - no_stylesheets = True - encoding = 'utf-8' - use_embedded_content = False - language = 'en_IN' - remove_empty_feeds = True - ignore_duplicate_articles = {'url'} - publication_type = 'magazine' - - conversion_options = { - 'comment': description, 'tags': category, 'publisher': publisher, 'language': language - } - - keep_only_tags = [classes('wp-block-post-title wp-block-post-excerpt ie-network-post-meta-wrapper wp-block-post-featured-image wp-block-post-content')] - remove_tags = [classes('parent_also_read')] - remove_attributes = ['width', 'height'] - - feeds = [ - # https://www.financialexpress.com/syndication/ - # Print feeds - ('Front Page','https://www.financialexpress.com/print/front-page/feed/'), - ('Corporate Markets','https://www.financialexpress.com/print/corporate-markets/feed/'), - ('Economy','https://www.financialexpress.com/print/economy-print/feed/'), - ('Opinion','https://www.financialexpress.com/print/edits-columns/feed/'), - ('personal Finance','https://www.financialexpress.com/print/personal-finance-print/feed/'), - # ('Brandwagon', 'https://www.financialexpress.com/print/brandwagon/feed/'), - # Other Feeds - ('Economy', 'https://www.financialexpress.com/economy/feed/'), - ('Banking & finance', 'https://www.financialexpress.com/industry/banking-finance/feed/'), - ('Opinion', 'https://www.financialexpress.com/opinion/feed/'), - ('Editorial', 'https://www.financialexpress.com/editorial/feed/'), - ('Budget', 'https://www.financialexpress.com/budget/feed/'), - ('Industry', 'https://www.financialexpress.com/industry/feed/'), - ('Market', 'https://www.financialexpress.com/market/feed/'), - ('Jobs', 'https://www.financialexpress.com/jobs/feed/'), - ('SME', 'https://www.financialexpress.com/industry/sme/feed/'), - ('Mutual Funds', 'https://www.financialexpress.com/money/mutual-funds/feed/'), - ('Health','https://www.financialexpress.com/lifestyle/health/feed'), - # ('Health Care','https://www.financialexpress.com/healthcare/feed'), - ('Science','https://www.financialexpress.com/lifestyle/science/feed'), - ('Infrastructure','https://www.financialexpress.com/infrastructure/feed'), - ('Money','https://www.financialexpress.com/money/feed'), - ] - - def get_cover_url(self): - soup = self.index_to_soup('https://www.magzter.com/IN/The-Indian-Express-Ltd./Financial-Express-Mumbai/Business/') - for citem in soup.findAll('meta', content=lambda s: s and s.endswith('view/3.jpg')): - return citem['content'] - - def preprocess_html(self, soup, *a): - for img in soup.findAll(attrs={'data-src': True}): - img['src'] = img['data-src'] - return soup diff --git a/recipes/felicia.recipe b/recipes/felicia.recipe deleted file mode 100644 index dc9cba5ebf..0000000000 --- a/recipes/felicia.recipe +++ /dev/null @@ -1,46 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- - -__license__ = 'GPL v3' -__copyright__ = u'2011, Silviu Cotoar\u0103' -''' -revistafelicia.ro -''' - -from calibre.web.feeds.news import BasicNewsRecipe - - -class Felicia(BasicNewsRecipe): - title = u'Revista Felicia' - __author__ = u'Silviu Cotoar\u0103' - description = u'O revist\u0103 pentru sufletul t\u0103u' - publisher = u'Revista Felicia' - oldest_article = 25 - language = 'ro' - max_articles_per_feed = 100 - no_stylesheets = True - use_embedded_content = False - category = 'Ziare,Reviste' - encoding = 'utf-8' - cover_url = 'http://www.3waves.net/uploads/image/logo-revista-felicia_03.jpg' - - conversion_options = { - 'comments': description, 'tags': category, 'language': language, 'publisher': publisher - } - - keep_only_tags = [ - dict(name='div', attrs={'class': 'header'}), dict( - name='div', attrs={'id': 'contentArticol'}) - ] - - remove_tags = [ - dict(name='img', attrs={'src': ['http://www.revistafelicia.ro/templates/default/images/hdr_ultimul_nr.jpg']}), dict( - name='div', attrs={'class': ['content']}) - ] - - feeds = [ - (u'Feeds', u'http://www.revistafelicia.ro/rss') - ] - - def preprocess_html(self, soup): - return self.adeify_images(soup) diff --git a/recipes/fhm_uk.recipe b/recipes/fhm_uk.recipe deleted file mode 100644 index 7c2ef76b4e..0000000000 --- a/recipes/fhm_uk.recipe +++ /dev/null @@ -1,45 +0,0 @@ -from calibre.web.feeds.news import BasicNewsRecipe - - -class AdvancedUserRecipe1325006965(BasicNewsRecipe): - title = u'FHM UK' - description = 'Good News for Men.' - cover_url = 'http://www.greatmagazines.co.uk/covers/large/w197/current/fhm.jpg' - # cover_url = 'http://profile.ak.fbcdn.net/hprofile-ak-snc4/373529_38324934806_64930243_n.jpg' - masthead_url = 'http://www.fhm.com/App_Resources/Images/Site/re-design/logo.gif' - __author__ = 'Dave Asbury' - # last updated 7/10/12 - language = 'en_GB' - oldest_article = 31 - max_articles_per_feed = 15 - remove_empty_feeds = True - no_stylesheets = True - - keep_only_tags = [ - dict(name='h1'), - dict(name='img', attrs={'id': 'ctl00_Body_imgMainImage'}), - dict(name='div', attrs={ - 'id': ['profileLeft', 'articleLeft', 'profileRight', 'profileBody']}), - dict(name='div', attrs={ - 'class': ['imagesCenterArticle', 'containerCenterArticle', 'articleBody', ]}), - - ] - - remove_tags = [ - dict(attrs={'id': ['ctl00_Body_divSlideShow']}), - - ] - feeds = [ - # repeatable search = {|}{%}{|}

{*}

- (u'Homepage', u'http://rss.feedsportal.com/c/375/f/434908/index.rss'), - (u'Funny', u'http://rss.feedsportal.com/c/375/f/434910/index.rss'), - (u'Girls', u'http://rss.feedsportal.com/c/375/f/434913/index.rss'), - ] - - extra_css = ''' - h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;} - h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;} - p{font-family:Arial,Helvetica,sans-serif;font-size:small;} - body{font-family:Helvetica,Arial,sans-serif;font-size:small;} - ''' diff --git a/recipes/fhmro.recipe b/recipes/fhmro.recipe deleted file mode 100644 index d4cafe4a5a..0000000000 --- a/recipes/fhmro.recipe +++ /dev/null @@ -1,51 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- - -__license__ = 'GPL v3' -__copyright__ = u'2011, Silviu Cotoar\u0103' -''' -fhm.ro -''' - -from calibre.web.feeds.news import BasicNewsRecipe - - -class FHMro(BasicNewsRecipe): - title = u'FHM Ro' - __author__ = u'Silviu Cotoar\u0103' - description = u'Pentru c\u0103 noi putem' - publisher = 'FHM' - oldest_article = 5 - language = 'ro' - max_articles_per_feed = 100 - no_stylesheets = True - use_embedded_content = False - category = 'Reviste' - encoding = 'utf-8' - cover_url = 'http://www.fhm.com/App_Resources/Images/Site/re-design/logo.gif' - - conversion_options = { - 'comments': description, 'tags': category, 'language': language, 'publisher': publisher - } - - keep_only_tags = [ - dict(name='div', attrs={'class': 'contentMainTitle'}), dict( - name='div', attrs={'class': 'entry'}) - ] - - remove_tags_after = [ - dict(name='div', attrs={'class': ['ratingblock ']}), dict( - name='a', attrs={'rel': ['tag']}) - ] - - remove_tags = [ - dict(name='div', attrs={'class': ['ratingblock ']}), dict( - name='div', attrs={'class': ['socialize-containter']}) - ] - - feeds = [ - (u'Feeds', u'http://www.fhm.ro/feed') - ] - - def preprocess_html(self, soup): - return self.adeify_images(soup) diff --git a/recipes/fifty_two.recipe b/recipes/fifty_two.recipe deleted file mode 100644 index f1d8e9ec02..0000000000 --- a/recipes/fifty_two.recipe +++ /dev/null @@ -1,30 +0,0 @@ -from calibre.web.feeds.news import BasicNewsRecipe, classes - - -class fiftytwo(BasicNewsRecipe): - title = u'Fifty Two' - description = ('Every week, 52 publishes an essay that dives deep into an aspect of India’s history,' - ' politics and culture. Each story will explain, recall or establish something interesting ' - 'about life on our subcontinent, and tell readers why it matters to them.') - language = 'en_IN' - __author__ = 'unkn0wn' - oldest_article = 30 # days - max_articles_per_feed = 50 - encoding = 'utf-8' - use_embedded_content = False - no_stylesheets = True - remove_attributes = ['style', 'height', 'width'] - masthead_url = 'https://fiftytwo.in//img/52-logo.png' - # https://fiftytwo.in/img/favicon.png - ignore_duplicate_articles = {'url'} - extra_css = '.story-info, .story-notes, .story-intro {font-size:small; font-style:italic;}' - - keep_only_tags = [ - classes( - 'story-banner__container story-info story-slices story-notes' - ), - ] - - feeds = [ - ('Articles', 'https://fiftytwo.in/feed.xml'), - ] diff --git a/recipes/fisco_oggi.recipe b/recipes/fisco_oggi.recipe deleted file mode 100644 index 128241af77..0000000000 --- a/recipes/fisco_oggi.recipe +++ /dev/null @@ -1,27 +0,0 @@ -__license__ = 'GPL v3' -__author__ = 'faber1971' -description = 'Website of Italian Governament Income Agency (about revenue, taxation, taxes)- v1.00 (17, December 2011)' - -from calibre.web.feeds.news import BasicNewsRecipe - - -class AdvancedUserRecipe1324112023(BasicNewsRecipe): - title = u'Fisco Oggi' - language = 'it' - __author__ = 'faber1971' - oldest_article = 7 - max_articles_per_feed = 100 - auto_cleanup = True - remove_javascript = True - no_stylesheets = True - - feeds = [ - (u'Attualit\xe0', u'http://www.fiscooggi.it/taxonomy/term/1/feed'), - (u'Normativa', u'http://www.fiscooggi.it/taxonomy/term/5/feed'), - (u'Giurisprudenza', u'http://www.fiscooggi.it/taxonomy/term/8/feed'), - (u'Dati e statistiche', u'http://www.fiscooggi.it/taxonomy/term/12/feed'), - - (u'Analisi e commenti', u'http://www.fiscooggi.it/taxonomy/term/13/feed'), - (u'Bilancio e contabilit\xe0', u'http://www.fiscooggi.it/taxonomy/term/576/feed'), - (u'Dalle regioni', u'http://www.fiscooggi.it/taxonomy/term/16/feed'), - (u'Dal mondo', u'http://www.fiscooggi.it/taxonomy/term/17/feed')] diff --git a/recipes/fleshbot.recipe b/recipes/fleshbot.recipe deleted file mode 100644 index 28981ec9af..0000000000 --- a/recipes/fleshbot.recipe +++ /dev/null @@ -1,39 +0,0 @@ -__license__ = 'GPL v3' -__copyright__ = '2010, NA' -''' -fleshbot.com -''' - -from calibre.web.feeds.news import BasicNewsRecipe - - -class Fleshbot(BasicNewsRecipe): - title = 'Fleshbot' - __author__ = 'NA' - description = "Fleshbot, Pure Filth." - publisher = 'Fleshbot.com' - category = 'news, sex, sex industry, celebs, nudes, adult, adult toys, sex toys' - oldest_article = 2 - max_articles_per_feed = 100 - no_stylesheets = True - encoding = 'utf-8' - use_embedded_content = True - language = 'en' - masthead_url = 'http://fbassets.s3.amazonaws.com/images/uploads/2012/01/fleshbot-logo.png' - extra_css = ''' - body{font-family: "Lucida Grande",Helvetica,Arial,sans-serif} - img{margin-bottom: 1em} - h1{font-family :Arial,Helvetica,sans-serif; font-size:large} - ''' - conversion_options = { - 'comment': description, 'tags': category, 'publisher': publisher, 'language': language - } - - feeds = [(u'Articles', u'http://fleshbot.com/?feed=rss2')] - - remove_tags = [ - {'class': 'feedflare'}, - ] - - def preprocess_html(self, soup): - return self.adeify_images(soup) diff --git a/recipes/folkebladet_dk.recipe b/recipes/folkebladet_dk.recipe deleted file mode 100644 index 1f8e24a3e8..0000000000 --- a/recipes/folkebladet_dk.recipe +++ /dev/null @@ -1,27 +0,0 @@ -#!/usr/bin/env python -# vim:fileencoding=utf-8 -# https://manual.calibre-ebook.com/news_recipe.html -from __future__ import absolute_import, division, print_function, unicode_literals - -from calibre.web.feeds.news import BasicNewsRecipe - -''' -Folkebladet -''' - - -class Folkebladet_dk(BasicNewsRecipe): - __author__ = 'CoderAllan.github.com' - title = 'Folkebladet' - description = 'Dine lokale nyheder på nettet' - category = 'newspaper, news, localnews, sport, culture, Denmark' - oldest_article = 7 - max_articles_per_feed = 50 - auto_cleanup = True - language = 'da' - - feeds = [ - ('Folkebladet', 'http://folkebladet.dk/feed/'), - ('Kommentarer', 'http://folkebladet.dk/comments/feed/'), - ] - diff --git a/recipes/folkebladetdjursland_dk.recipe b/recipes/folkebladetdjursland_dk.recipe deleted file mode 100644 index fdca400675..0000000000 --- a/recipes/folkebladetdjursland_dk.recipe +++ /dev/null @@ -1,26 +0,0 @@ -#!/usr/bin/env python -# vim:fileencoding=utf-8 -# https://manual.calibre-ebook.com/news_recipe.html -from __future__ import absolute_import, division, print_function, unicode_literals - -from calibre.web.feeds.news import BasicNewsRecipe - -''' -Folkebladet Djursland -''' - - -class FolkebladetDjursland_dk(BasicNewsRecipe): - __author__ = 'CoderAllan.github.com' - title = 'Folkebladet Djursland' - description = 'Lokale og regionale nyheder' - category = 'newspaper, news, localnews, Denmark' - oldest_article = 7 - max_articles_per_feed = 50 - auto_cleanup = True - language = 'da' - - feeds = [ - ('Nyheder', 'http://dinby.dk/folkebladet-djursland/rss'), - ] - diff --git a/recipes/folketidende_dk.recipe b/recipes/folketidende_dk.recipe deleted file mode 100644 index 13b6041296..0000000000 --- a/recipes/folketidende_dk.recipe +++ /dev/null @@ -1,27 +0,0 @@ -#!/usr/bin/env python -# vim:fileencoding=utf-8 -# https://manual.calibre-ebook.com/news_recipe.html -from __future__ import absolute_import, division, print_function, unicode_literals - -from calibre.web.feeds.news import BasicNewsRecipe - -''' -folketidende.dk -''' - - -class Folketidende_dk(BasicNewsRecipe): - __author__ = 'CoderAllan.github.com' - title = 'folketidende.dk' - description = 'Lokalt nyhedssite, med nyheder og lokalstof om Lolland og Falster' - category = 'newspaper, news, localnews, sport, culture, Denmark' - oldest_article = 7 - max_articles_per_feed = 50 - auto_cleanup = True - language = 'da' - - feeds = [ - ('folketidende.dk - Lolland & Falster samlet på et sted', 'http://folketidende.dk/rss-nyhedsbrev.xml'), - - ] - diff --git a/recipes/forbes.recipe b/recipes/forbes.recipe deleted file mode 100644 index 880af999dd..0000000000 --- a/recipes/forbes.recipe +++ /dev/null @@ -1,67 +0,0 @@ -from __future__ import absolute_import, division, print_function, unicode_literals - -from calibre.web.feeds.news import BasicNewsRecipe - - -def classes(classes): - q = frozenset(classes.split(' ')) - return dict(attrs={'class': lambda x: x and frozenset(x.split()).intersection(q)}) - - -class Forbes(BasicNewsRecipe): - title = u'Forbes' - description = 'Business and Financial News' - __author__ = 'Kovid Goyal' - oldest_article = 30 - max_articles_per_feed = 20 - language = 'en' - encoding = 'utf-8' - no_stylesheets = True - ignore_duplicate_articles = {'title', 'url'} - remove_empty_feeds = True - - extra_css = ''' - div.fb-captioned-img { - font-size: smaller; - margin-top: 1em; margin-bottom: 1em; - } - div.fb-captioned-img img { - display:block; - margin-left: auto; margin-right: auto; - } - ''' - feeds = [ - (u'Latest', u'https://www.forbes.com/news/index.xml'), - (u'Most Popular', u'https://www.forbes.com/feeds/popstories.xml'), - (u'Technology', u'https://www.forbes.com/technology/index.xml'), - (u'Business', u'https://www.forbes.com/business/index.xml'), - (u'Sports Money', u'https://www.forbes.com/sportsmoney/index.xml'), - (u'Leadership', u'https://www.forbes.com/leadership/index.xml'), - ] - - keep_only_tags = [ - classes('article-headline-container hero-image-block article-body bottom-contrib-block') - ] - - remove_tags = [ - classes('article-sharing'), - dict(name='button'), - ] - - def preprocess_html(self, soup): - h = soup.find(**classes('hero-image-block')) - if h is not None: - h1 = soup.find(**classes('article-headline-container')) - h.extract() - h1.append(h) - return soup - - def get_browser(self): - br = BasicNewsRecipe.get_browser(self) - br.set_cookie('dailyWelcomeCookie', 'true', '.forbes.com') - br.set_cookie('welcomeAd', 'true', '.forbes.com') - return br - - # def parse_index(self): - # return [('Articles', [{'title':'Test', 'url': - # 'http://www.forbes.com/sites/hamdiraini/2016/04/25/bazin-seeks-startups-to-accelerate-accorhotels-transformation/'}])] diff --git a/recipes/forbes_india.recipe b/recipes/forbes_india.recipe deleted file mode 100644 index 8b9f248a22..0000000000 --- a/recipes/forbes_india.recipe +++ /dev/null @@ -1,55 +0,0 @@ -from calibre.ptempfile import PersistentTemporaryFile -from calibre.web.feeds.news import BasicNewsRecipe - - -class AdvancedUserRecipe1276934715(BasicNewsRecipe): - title = u'Forbes India' - __author__ = 'rty' - description = 'India Edition Forbes' - publisher = 'Forbes India' - category = 'Business News, Economy, India' - oldest_article = 7 - max_articles_per_feed = 100 - remove_javascript = True - use_embedded_content = False - no_stylesheets = True - language = 'en_IN' - temp_files = [] - articles_are_obfuscated = True - conversion_options = {'linearize_tables': True} - feeds = [ - (u'Contents', u'http://business.in.com/rssfeed/rss_all.xml'), - ] - extra_css = ''' - .t-10-gy-l{font-style: italic; font-size: small} - .t-30-b-d{font-weight: bold; font-size: xx-large} - .t-16-gy-l{font-weight: bold; font-size: x-large; font-syle: italic} - .storycontent{font-size: 4px;font-family: Times New Roman;} - ''' - - remove_tags_before = dict(name='div', attrs={'class': 'pdl10 pdr15'}) - - def get_obfuscated_article(self, url): - br = self.get_browser() - br.open(url) - response = br.follow_link(url_regex=r'/printcontent/[0-9]+', nr=0) - html = response.read() - self.temp_files.append(PersistentTemporaryFile('_fa.html')) - self.temp_files[-1].write(html) - self.temp_files[-1].close() - return self.temp_files[-1].name - - def get_cover_url(self): - index = 'http://business.in.com/magazine/' - soup = self.index_to_soup(index) - for image in soup.findAll('a', {"class": "lbOn a-9-b-d"}): - return image['href'] - # return image['href'] + '.jpg' - return None - - def preprocess_html(self, soup): - for item in soup.findAll(style=True): - del item['style'] - for item in soup.findAll(width=True): - del item['width'] - return soup diff --git a/recipes/forbes_pl.recipe b/recipes/forbes_pl.recipe deleted file mode 100644 index 5a7dbad559..0000000000 --- a/recipes/forbes_pl.recipe +++ /dev/null @@ -1,58 +0,0 @@ -#!/usr/bin/env python - -__license__ = 'GPL v3' - -import datetime -import re - -from calibre.web.feeds.news import BasicNewsRecipe - - -class forbes_pl(BasicNewsRecipe): - title = u'Forbes.pl' - __author__ = 'Artur Stachecki ' - language = 'pl' - description = u'Biznes, finanse, gospodarka, strategie, wiadomości gospodarcze, analizy finasowe i strategiczne.' - oldest_article = 1 - index = 'http://www.forbes.pl' - cover_url = 'http://www.forbes.pl/resources/front/images/logo.png' - max_articles_per_feed = 100 - extra_css = '.Block-Photo {float:left; max-width: 300px; margin-right: 5px;}' - preprocess_regexps = [(re.compile(u'

()?(Czytaj|Zobacz) (też|także):.*?

', re.DOTALL), - lambda match: ''), (re.compile(u'Zobacz:.*?', re.DOTALL), lambda match: '')] - remove_javascript = True - no_stylesheets = True - now = datetime.datetime.now() - yesterday = now - datetime.timedelta(hours=24) - yesterday = yesterday.strftime("%d.%m.%Y %H:%M:%S") - pages_count = 4 - keep_only_tags = [dict(attrs={'class': [ - 'Block-Node Content-Article ', 'Block-Node Content-Article piano-closed']})] - remove_tags = [dict(attrs={'class': [ - 'Keywords Styled', 'twitter-share-button', 'Block-List-Related Block-List']})] - - feeds = [(u'Wszystkie', 'http://www.forbes.pl/rss')] - - '''def preprocess_html(self, soup): - self.append_page(soup, soup.body) - return soup - - - def append_page(self, soup, appendtag): - cleanup = False - nexturl = appendtag.find('a', attrs={'class':'next'}) - if nexturl: - cleanup = True - while nexturl: - soup2 = self.index_to_soup(self.index + nexturl['href']) - nexturl = soup2.find('a', attrs={'class':'next'}) - pagetext = soup2.findAll(id='article-body-wrapper') - if not pagetext: - pagetext = soup2.findAll(attrs={'class':'Article-Entry Styled'}) - for comment in pagetext.findAll(text=lambda text:isinstance(text, Comment)): - comment.extract() - pos = len(appendtag.contents) - appendtag.insert(pos, pagetext) - if cleanup: - for r in appendtag.findAll(attrs={'class':'paginator'}): - r.extract()''' diff --git a/recipes/formulaas.recipe b/recipes/formulaas.recipe deleted file mode 100644 index 802eefeab9..0000000000 --- a/recipes/formulaas.recipe +++ /dev/null @@ -1,48 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- - -__license__ = 'GPL v3' -__copyright__ = u'2011, Silviu Cotoar\u0103' -''' -formula-as.ro -''' - -from calibre.web.feeds.news import BasicNewsRecipe - - -class FormulaAS(BasicNewsRecipe): - title = u'Formula AS' - __author__ = u'Silviu Cotoar\u0103' - publisher = u'Formula AS' - description = u'Formula AS' - oldest_article = 5 - language = 'ro' - max_articles_per_feed = 100 - no_stylesheets = True - use_embedded_content = False - category = 'Ziare,Romania' - encoding = 'utf-8' - cover_url = 'http://www.formula-as.ro/_client/img/header_logo.png' - - conversion_options = { - 'comments': description, 'tags': category, 'language': language, 'publisher': publisher - } - - keep_only_tags = [ - dict(name='div', attrs={'class': 'item padded'}) - ] - - remove_tags = [ - dict(name='ul', attrs={'class': 'subtitle lower'}) - ] - - remove_tags_after = [ - dict(name='ul', attrs={'class': 'subtitle lower'}), - dict(name='div', attrs={'class': 'item-brief-options'}) - ] - feeds = [ - (u'\u0218tiri', u'http://www.formula-as.ro/rss/articole.xml') - ] - - def preprocess_html(self, soup): - return self.adeify_images(soup) diff --git a/recipes/forsal.recipe b/recipes/forsal.recipe deleted file mode 100644 index 9ed97b0f76..0000000000 --- a/recipes/forsal.recipe +++ /dev/null @@ -1,51 +0,0 @@ -import re - -from calibre.web.feeds.news import BasicNewsRecipe - - -class ForsalPL(BasicNewsRecipe): - title = u'Forsal.pl' - __author__ = 'fenuks' - description = u'Na portalu finansowym Forsal.pl znajdziesz najświeższe wiadomości finansowe i analizy. Kliknij i poznaj aktualne kursy walut, notowania giełdowe oraz inne wiadomości ze świata finansów.' # noqa - category = 'economy, finance' - language = 'pl' - oldest_article = 7 - max_articles_per_feed = 100 - use_embedded_content = False - remove_empty_feeds = True - ignore_duplicate_articles = {'title', 'url'} - cover_url = 'http://www.bizneswnieruchomosciach.pl/wp-content/uploads/2010/07/logo_forsal.jpg' - no_stylesheets = True - remove_tags = [dict(name='div', attrs={'class': 'related'}), dict( - name='img', attrs={'title': 'Forsal'})] - feeds = [ - (u'Najnowsze', u'http://forsal.pl/atom/najnowsze'), - (u'Tylko na forsal.pl', u'http://forsal.pl/atom/tagi/forsal'), - (u'Publicystyka', u'http://forsal.pl/atom/tagi/opinia'), - (u'Bloomberg', u'http://forsal.pl/atom/tagi/bloomberg'), - (u'Financial Times', u'http://forsal.pl/atom/tagi/financial_times'), - (u'Gie\u0142da', u'http://forsal.pl/atom/tagi/gielda'), - (u'Waluty', u'http://forsal.pl/atom/tagi/waluty'), - (u'Surowce', u'http://forsal.pl/atom/tagi/surowce'), - (u'Komenarze finasnowe', u'http://forsal.pl/atom/tagi/komentarz'), - (u'Komentarze gie\u0142dowe', u'http://forsal.pl/atom/tagi/komentarz;gielda'), - (u'Komentarze walutowe', u'http://forsal.pl/atom/tagi/komentarz;waluty'), - - (u'Makroekonomia', u'http://forsal.pl/atom/tagi/makroekonomia'), - (u'Handel', u'http://forsal.pl/atom/tagi/handel'), - (u'Nieruchomo\u015bci', u'http://forsal.pl/atom/tagi/nieruchomosci'), - (u'Motoryzacja', u'http://forsal.pl/atom/tagi/motoryzacja'), - (u'Finanse', u'http://forsal.pl/atom/tagi/finanse'), - (u'Transport', u'http://forsal.pl/atom/tagi/transport'), - (u'Media', u'http://forsal.pl/atom/tagi/media'), - (u'Telekomunikacja', u'http://forsal.pl/atom/tagi/telekomunikacja'), - (u'Energetyka', u'http://forsal.pl/atom/tagi/energetyka'), - (u'Przemys\u0142', u'http://forsal.pl/atom/tagi/przemysl'), - (u'Moja firma', u'http://forsal.pl/atom/tagi/moja_firma')] - - def print_version(self, url): - url_id = re.search(u'/[0-9]+,', url) - if url_id: - return 'http://forsal.pl/drukowanie' + url_id.group(0)[:-1] - else: - return url diff --git a/recipes/fotoblogia_pl.recipe b/recipes/fotoblogia_pl.recipe deleted file mode 100644 index f72454aa33..0000000000 --- a/recipes/fotoblogia_pl.recipe +++ /dev/null @@ -1,18 +0,0 @@ -from calibre.web.feeds.news import BasicNewsRecipe - - -class Fotoblogia_pl(BasicNewsRecipe): - title = u'Fotoblogia.pl' - __author__ = 'fenuks' - description = u'Jeden z największych polskich blogów o fotografii.' - category = 'photography' - language = 'pl' - masthead_url = 'http://img.interia.pl/komputery/nimg/u/0/fotoblogia21.jpg' - cover_url = 'http://fotoblogia.pl/images/2009/03/fotoblogia2.jpg' - oldest_article = 7 - max_articles_per_feed = 100 - no_stylesheets = True - use_embedded_content = False - keep_only_tags = [dict(name='article')] - remove_tags = [dict(attrs={'class': 'article-related'})] - feeds = [(u'Wszystko', u'http://fotoblogia.pl/feed/rss2')] diff --git a/recipes/fr_online.recipe b/recipes/fr_online.recipe deleted file mode 100644 index 85e415965e..0000000000 --- a/recipes/fr_online.recipe +++ /dev/null @@ -1,75 +0,0 @@ -#!/usr/bin/env python - -__license__ = 'GPL v3' -__copyright__ = '2010, Christian Schmitt' - -''' -fr-online.de -''' - -from calibre.web.feeds.recipes import BasicNewsRecipe - - -class FROnlineRecipe(BasicNewsRecipe): - title = 'Frankfurter Rundschau' - __author__ = 'maccs' - description = 'Nachrichten aus D und aller Welt' - encoding = 'utf-8' - masthead_url = 'http://www.fr-online.de/image/view/-/1474018/data/823552/-/logo.png' - publisher = 'Druck- und Verlagshaus Frankfurt am Main GmbH' - category = 'news, germany, world' - language = 'de' - publication_type = 'newspaper' - use_embedded_content = False - remove_javascript = True - no_stylesheets = True - oldest_article = 1 # Increase this number if you're interested in older articles - max_articles_per_feed = 50 # Seems a reasonable number to me - extra_css = ''' - body { font-family: "arial", "verdana", "geneva", sans-serif; font-size: 12px; margin: 0px; background-color: #ffffff;} - .imgSubline{background-color: #f4f4f4; font-size: 0.8em;} - .p--heading-1 {font-weight: bold;} - .calibre_navbar {font-size: 0.8em; font-family: "arial", "verdana", "geneva", sans-serif;} - ''' - remove_tags = [dict(name='div', attrs={'id': 'Logo'})] - cover_url = 'http://www.fr-online.de/image/view/-/1474018/data/823552/-/logo.png' - cover_margins = (100, 150, '#ffffff') - - feeds = [] - feeds.append( - ('Startseite', u'http://www.fr-online.de/home/-/1472778/1472778/-/view/asFeed/-/index.xml')) - feeds.append( - ('Politik', u'http://www.fr-online.de/politik/-/1472596/1472596/-/view/asFeed/-/index.xml')) - feeds.append( - ('Meinung', u'http://www.fr-online.de/politik/meinung/-/1472602/1472602/-/view/asFeed/-/index.xml')) - feeds.append( - ('Wirtschaft', u'http://www.fr-online.de/wirtschaft/-/1472780/1472780/-/view/asFeed/-/index.xml')) - feeds.append( - ('Sport', u'http://www.fr-online.de/sport/-/1472784/1472784/-/view/asFeed/-/index.xml')) - feeds.append(('Eintracht Frankfurt', - u'http://www.fr-online.de/sport/eintracht-frankfurt/-/1473446/1473446/-/view/asFeed/-/index.xml')) - feeds.append(('Kultur und Medien', - u'http://www.fr-online.de/kultur/-/1472786/1472786/-/view/asFeed/-/index.xml')) - feeds.append( - ('Panorama', u'http://www.fr-online.de/panorama/-/1472782/1472782/-/view/asFeed/-/index.xml')) - feeds.append( - ('Frankfurt', u'http://www.fr-online.de/frankfurt/-/1472798/1472798/-/view/asFeed/-/index.xml')) - feeds.append( - ('Rhein-Main', u'http://www.fr-online.de/rhein-main/-/1472796/1472796/-/view/asFeed/-/index.xml')) - feeds.append( - ('Hanau', u'http://www.fr-online.de/rhein-main/hanau/-/1472866/1472866/-/view/asFeed/-/index.xml')) - feeds.append( - ('Darmstadt', u'http://www.fr-online.de/rhein-main/darmstadt/-/1472858/1472858/-/view/asFeed/-/index.xml')) - feeds.append( - ('Wiesbaden', u'http://www.fr-online.de/rhein-main/wiesbaden/-/1472860/1472860/-/view/asFeed/-/index.xml')) - feeds.append( - ('Offenbach', u'http://www.fr-online.de/rhein-main/offenbach/-/1472856/1472856/-/view/asFeed/-/index.xml')) - feeds.append( - ('Bad Homburg', u'http://www.fr-online.de/rhein-main/bad-homburg/-/1472864/1472864/-/view/asFeed/-/index.xml')) - feeds.append( - ('Digital', u'http://www.fr-online.de/digital/-/1472406/1472406/-/view/asFeed/-/index.xml')) - feeds.append( - ('Wissenschaft', u'http://www.fr-online.de/wissenschaft/-/1472788/1472788/-/view/asFeed/-/index.xml')) - - def print_version(self, url): - return url.replace('index.html', 'view/printVersion/-/index.html') diff --git a/recipes/frankfurter_rundschau.recipe b/recipes/frankfurter_rundschau.recipe deleted file mode 100644 index 1eb8c348f1..0000000000 --- a/recipes/frankfurter_rundschau.recipe +++ /dev/null @@ -1,59 +0,0 @@ -#!/usr/bin/env python - -''' -fr-online.de -''' - -from calibre.web.feeds.recipes import BasicNewsRecipe - - -def classes(classes): - q = frozenset(classes.split(' ')) - return dict(attrs={ - 'class': lambda x: x and frozenset(x.split()).intersection(q)}) - - -class FR(BasicNewsRecipe): - title = 'Frankfurter Rundschau' - __author__ = 'Kovid Goyal' - description = 'Nachrichten aus D und aller Welt' - language = 'de' - publication_type = 'newspaper' - use_embedded_content = False - remove_javascript = True - no_stylesheets = True - oldest_article = 1 # Increase this number if you're interested in older articles - max_articles_per_feed = 50 # Seems a reasonable number to me - encoding = 'cp1252' - - keep_only_tags = [ - dict(id='fcms_page_main'), - ] - remove_tags = [ - dict(name='footer'), - dict(id='comments'), - ] - - feeds = [ - ('Startseite', u'http://www.fr.de/?_XML=rss'), - ('Frankfurt', u'https://www.fr.de/frankfurt/?_XML=rss'), - ('Rhein-Main', 'https://www.fr.de/rhein-main/?_XML=rss'), - ('Politik', 'https://www.fr.de/politik/?_XML=rss'), - ('Wirtschaft', 'https://www.fr.de/wirtschaft/?_XML=rss'), - ('Sport', 'https://www.fr.de/sport/?_XML=rss'), - ('Eintracht Frankfurt', 'https://www.fr.de/sport/eintracht/?_XML=rss'), - ('Kultur', 'https://www.fr.de/kultur/?_XML=rss'), - ('Wissen', 'https://www.fr.de/wissen/?_XML=rss'), - ('Leben', 'https://www.fr.de/leben/?_XML=rss'), - ('Panorama', 'https://www.fr.de/panorama/?_XML=rss'), - ] - - def preprocess_html(self, soup): - for img in soup.findAll('img', attrs={'data-src': True}): - img['src'] = img['data-src'] - main = soup.find(id='fcms_page_main') - for i, tag in tuple(enumerate(main)): - if getattr(tag, 'name', None): - main.replaceWith(tag) - break - return soup diff --git a/recipes/freakonomics.recipe b/recipes/freakonomics.recipe deleted file mode 100644 index 8b0ff9e840..0000000000 --- a/recipes/freakonomics.recipe +++ /dev/null @@ -1,35 +0,0 @@ -#!/usr/bin/env python -__license__ = 'GPL v3' -__copyright__ = '2011, Starson17' -__docformat__ = 'restructuredtext en' - -from calibre.web.feeds.news import BasicNewsRecipe - - -class Freakonomics(BasicNewsRecipe): - title = 'Freakonomics Blog' - description = 'The Hidden side of everything' - __author__ = 'Starson17' - __version__ = '1.02' - __date__ = '11 July 2011' - language = 'en' - cover_url = 'http://ilkerugur.files.wordpress.com/2009/04/freakonomics.jpg' - use_embedded_content = False - no_stylesheets = True - oldest_article = 30 - remove_javascript = True - remove_empty_feeds = True - max_articles_per_feed = 50 - - feeds = [(u'Freakonomics Blog', u'http://www.freakonomics.com/feed/')] - keep_only_tags = [dict(name='div', attrs={'id': ['content']})] - remove_tags_after = [ - dict(name='div', attrs={'class': ['simple_socialmedia']})] - remove_tags = [dict(name='div', attrs={ - 'class': ['simple_socialmedia', 'single-fb-share', 'wp-polls']})] - extra_css = ''' - h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;} - h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;} - p{font-family:Arial,Helvetica,sans-serif;font-size:small;} - body{font-family:Helvetica,Arial,sans-serif;font-size:small;} - ''' diff --git a/recipes/fredensborglokalavisen_dk.recipe b/recipes/fredensborglokalavisen_dk.recipe deleted file mode 100644 index 6d636a08fe..0000000000 --- a/recipes/fredensborglokalavisen_dk.recipe +++ /dev/null @@ -1,32 +0,0 @@ -#!/usr/bin/env python -# vim:fileencoding=utf-8 -# https://manual.calibre-ebook.com/news_recipe.html -from __future__ import absolute_import, division, print_function, unicode_literals - -from calibre.web.feeds.news import BasicNewsRecipe - -''' -Uge-Nyt -''' - - -class FredensborgLokalavisen_dk(BasicNewsRecipe): - __author__ = 'CoderAllan.github.com' - title = 'Uge-Nyt' - description = 'Uge-Nyt: Lokale og regionale nyheder, sport og kultur fra Fredensborg, Kokkedal og Humlebæk på fredensborg.lokalavisen.dk' - category = 'newspaper, news, localnews, sport, culture, Denmark' - oldest_article = 7 - max_articles_per_feed = 50 - auto_cleanup = True - language = 'da' - - feeds = [ - ('Seneste nyt fra Uge-Nyt', 'http://fredensborg.lokalavisen.dk/section/senestenytrss'), - ('Seneste lokale nyheder fra Uge-Nyt', 'http://fredensborg.lokalavisen.dk/section/senestelokalenyhederrss'), - ('Seneste sport fra Uge-Nyt', 'http://fredensborg.lokalavisen.dk/section/senestesportrss'), - ('Seneste 112 nyheder fra Uge-Nyt', 'http://fredensborg.lokalavisen.dk/section/seneste112rss'), - ('Seneste kultur nyheder fra Uge-Nyt', 'http://fredensborg.lokalavisen.dk/section/senestekulturrss'), - ('Seneste læserbreve fra Uge-Nyt', 'http://fredensborg.lokalavisen.dk/section/senestelaeserbreverss'), - - ] - diff --git a/recipes/fredericialokalavisen_dk.recipe b/recipes/fredericialokalavisen_dk.recipe deleted file mode 100644 index 3fdf24d4c8..0000000000 --- a/recipes/fredericialokalavisen_dk.recipe +++ /dev/null @@ -1,32 +0,0 @@ -#!/usr/bin/env python -# vim:fileencoding=utf-8 -# https://manual.calibre-ebook.com/news_recipe.html -from __future__ import absolute_import, division, print_function, unicode_literals - -from calibre.web.feeds.news import BasicNewsRecipe - -''' -Lokalavisen Fredericia -''' - - -class FredericiaLokalavisen_dk(BasicNewsRecipe): - __author__ = 'CoderAllan.github.com' - title = 'Lokalavisen Fredericia' - description = 'Lokale og regionale nyheder, sport, kultur fra Fredericia og omegn på fredericia.lokalavisen.dk' - category = 'newspaper, news, localnews, sport, culture, Denmark' - oldest_article = 7 - max_articles_per_feed = 50 - auto_cleanup = True - language = 'da' - - feeds = [ - ('Seneste nyt fra Lokalavisen Fredericia', 'http://fredericia.lokalavisen.dk/section/senestenytrss'), - ('Seneste lokale nyheder fra Lokalavisen Fredericia', 'http://fredericia.lokalavisen.dk/section/senestelokalenyhederrss'), - ('Seneste sport fra Lokalavisen Fredericia', 'http://fredericia.lokalavisen.dk/section/senestesportrss'), - ('Seneste 112 nyheder fra Lokalavisen Fredericia', 'http://fredericia.lokalavisen.dk/section/seneste112rss'), - ('Seneste kultur nyheder fra Lokalavisen Fredericia', 'http://fredericia.lokalavisen.dk/section/senestekulturrss'), - ('Seneste læserbreve fra Lokalavisen Fredericia', 'http://fredericia.lokalavisen.dk/section/senestelaeserbreverss'), - - ] - diff --git a/recipes/frederiksbergbladet_dk.recipe b/recipes/frederiksbergbladet_dk.recipe deleted file mode 100644 index d7d3f42b0c..0000000000 --- a/recipes/frederiksbergbladet_dk.recipe +++ /dev/null @@ -1,28 +0,0 @@ -#!/usr/bin/env python -# vim:fileencoding=utf-8 -# https://manual.calibre-ebook.com/news_recipe.html -from __future__ import absolute_import, division, print_function, unicode_literals - -from calibre.web.feeds.news import BasicNewsRecipe - -''' -Frederiksberg Bladet -''' - - -class FrederiksbergBladet_dk(BasicNewsRecipe): - __author__ = 'CoderAllan.github.com' - title = 'Frederiksberg Bladet' - - category = 'newspaper, news, localnews, sport, culture, Denmark' - oldest_article = 7 - max_articles_per_feed = 50 - auto_cleanup = True - language = 'da' - - feeds = [ - ('Frederiksberg Bladet', 'http://minby.dk/frederiksberg-bladet/feed/'), - ('Kommentarer til Frederiksberg Bladet', 'http://minby.dk/frederiksberg-bladet/comments/feed/'), - - ] - diff --git a/recipes/frederikssundlokalavisen_dk.recipe b/recipes/frederikssundlokalavisen_dk.recipe deleted file mode 100644 index 80086f5ce2..0000000000 --- a/recipes/frederikssundlokalavisen_dk.recipe +++ /dev/null @@ -1,32 +0,0 @@ -#!/usr/bin/env python -# vim:fileencoding=utf-8 -# https://manual.calibre-ebook.com/news_recipe.html -from __future__ import absolute_import, division, print_function, unicode_literals - -from calibre.web.feeds.news import BasicNewsRecipe - -''' -Lokalavisen Frederikssund -''' - - -class FrederikssundLokalavisen_dk(BasicNewsRecipe): - __author__ = 'CoderAllan.github.com' - title = 'Lokalavisen Frederikssund' - description = 'Lokale, regionale nyheder, sport og kultur fra Frederikssund, Jægerspris og omegn på frederikssund.lokalavisen.dk' - category = 'newspaper, news, localnews, sport, culture, Denmark' - oldest_article = 7 - max_articles_per_feed = 50 - auto_cleanup = True - language = 'da' - - feeds = [ - ('Seneste nyt fra Lokalavisen Frederikssund', 'http://frederikssund.lokalavisen.dk/section/senestenytrss'), - ('Seneste lokale nyheder fra Lokalavisen Frederikssund', 'http://frederikssund.lokalavisen.dk/section/senestelokalenyhederrss'), - ('Seneste sport fra Lokalavisen Frederikssund', 'http://frederikssund.lokalavisen.dk/section/senestesportrss'), - ('Seneste 112 nyheder fra Lokalavisen Frederikssund', 'http://frederikssund.lokalavisen.dk/section/seneste112rss'), - ('Seneste kultur nyheder fra Lokalavisen Frederikssund', 'http://frederikssund.lokalavisen.dk/section/senestekulturrss'), - ('Seneste læserbreve fra Lokalavisen Frederikssund', 'http://frederikssund.lokalavisen.dk/section/senestelaeserbreverss'), - - ] - diff --git a/recipes/freeway.recipe b/recipes/freeway.recipe deleted file mode 100644 index 76fc96a05c..0000000000 --- a/recipes/freeway.recipe +++ /dev/null @@ -1,92 +0,0 @@ -#!/usr/bin/env python - -__license__ = 'GPL v3' -__author__ = '2010, Gustavo Azambuja ' -''' -http://freeway.com.uy -''' - -from calibre.web.feeds.news import BasicNewsRecipe - - -class General(BasicNewsRecipe): - title = 'freeway.com.uy' - __author__ = 'Gustavo Azambuja' - description = 'Revista Freeway, Montevideo, Uruguay' - language = 'es_UY' - timefmt = '[%a, %d %b, %Y]' - use_embedded_content = False - recursion = 1 - encoding = 'utf8' - remove_javascript = True - no_stylesheets = True - conversion_options = {'linearize_tables': True} - - oldest_article = 180 - max_articles_per_feed = 100 - keep_only_tags = [ - dict(id=['contenido']), - dict(name='a', attrs={'class': 'titulo_art_ppal'}), - dict(name='img', attrs={'class': 'recuadro'}), - dict(name='td', attrs={'class': 'txt_art_ppal'}) - ] - remove_tags = [ - dict(name=['object', 'link']) - ] - remove_attributes = ['width', 'height', 'style', 'font', 'color'] - - extra_css = ''' - h1{font-family:Geneva, Arial, Helvetica, sans-serif;color:#154B7A;} - h3{font-size: 14px;color:#999999; font-family:Geneva, Arial, Helvetica, sans-serif;font-weight: bold;} - h2{color:#666666; font-family:Geneva, Arial, Helvetica, sans-serif;font-size:small;} - img {float:left; clear:both; margin:10px} - p {font-family:Arial,Helvetica,sans-serif;} - ''' - - def parse_index(self): - feeds = [] - for title, url in [('Articulos', 'http://freeway.com.uy/revista/')]: - articles = self.art_parse_section(url) - if articles: - feeds.append((title, articles)) - return feeds - - def art_parse_section(self, url): - soup = self.index_to_soup(url) - div = soup.find(attrs={'id': 'tbl_1'}) - - current_articles = [] - for tag in div.findAllNext(attrs={'class': 'ancho_articulos'}): - if tag.get('class') == 'link-list-heading': - break - for td in tag.findAll('td'): - a = td.find('a', attrs={'class': 'titulo_articulos'}) - if a is None: - continue - title = self.tag_to_string(a) - url = a.get('href', False) - if not url or not title: - continue - if url.startswith('/'): - url = 'http://freeway.com.uy' + url - p = td.find('p', attrs={'class': 'txt_articulos'}) - description = self.tag_to_string(p) - self.log('\t\tFound article:', title) - self.log('\t\t\t', url) - self.log('\t\t\t', description) - current_articles.append( - {'title': title, 'url': url, 'description': description, 'date': ''}) - - return current_articles - - def preprocess_html(self, soup): - attribs = ['style', 'font', 'valign', 'colspan', 'width', 'height', 'rowspan', 'summary', 'align', 'cellspacing', 'cellpadding', 'frames', 'rules', 'border' ] # noqa - for item in soup.body.findAll(name=['table', 'td', 'tr', 'th', 'caption', 'thead', 'tfoot', 'tbody', 'colgroup', 'col']): - item.name = 'div' - for attrib in attribs: - item[attrib] = '' - del item[attrib] - return soup - - def get_cover_url(self): - return 'http://freeway.com.uy/_upload/_n_foto_grande/noticia_1792_tapanoviembre2010.jpg' diff --git a/recipes/fstream.recipe b/recipes/fstream.recipe deleted file mode 100644 index 84745c7169..0000000000 --- a/recipes/fstream.recipe +++ /dev/null @@ -1,75 +0,0 @@ -from calibre.web.feeds.news import BasicNewsRecipe - - -class FIELDSTREAM(BasicNewsRecipe): - title = 'Field and Stream' - __author__ = 'Starson17 and Tonythebookworm' - description = 'Hunting and Fishing and Gun Talk' - language = 'en' - no_stylesheets = True - publisher = 'Starson17 and Tonythebookworm' - category = 'food recipes, hunting, fishing, guns' - use_embedded_content = False - no_stylesheets = True - oldest_article = 24 - remove_javascript = True - remove_empty_feeds = True - cover_url = 'http://www.arrowheadflyangler.com/Portals/1/Articles/FieldStream/Field%20and%20Stream%20March%20Fishing%20Edition%20Article%20Cover.jpg' # noqa - max_articles_per_feed = 10 - INDEX = 'http://www.fieldandstream.com' - - keep_only_tags = [ - dict(name='div', attrs={'class': ['article-wrapper']}), - ] - remove_tags = [ - dict(name='div', attrs={ - 'class': lambda x: x and 'content-main-bottom' in x.split()}), - dict(name='div', attrs={ - 'class': lambda x: x and 'pw-widget' in x.split()}), - ] - - def preprocess_html(self, soup): - for img in soup.findAll('img', attrs={'data-src': True}): - img['src'] = img['data-src'] - for form in soup.findAll('form'): - form.parent.extract() - return soup - - def parse_index(self): - feeds = [] - num = self.test[0] if self.test else 100 - for title, url in [ - ('Field Test', 'http://www.fieldandstream.com/blogs/field-test'), - (u"Wild Chef", u"http://www.fieldandstream.com/blogs/wild-chef"), - (u"The Gun Nuts", u"http://www.fieldandstream.com/blogs/gun-nut"), - (u"Whitetail 365", u"http://www.fieldandstream.com/blogs/whitetail-365"), - ('Field Notes', 'http://www.fieldandstream.com/blogs/field-notes'), - (u"Fly Talk", u"http://www.fieldandstream.com/blogs/flytalk"), - (u"The Conservationist", - u"http://www.fieldandstream.com/blogs/conservationist"), - ('The Lateral Line', 'http://www.fieldandstream.com/blogs/lateral-line'), - ('Total Outdoorsman', - 'http://www.fieldandstream.com/blogs/total-outdoorsman'), - ('A Sportsman\'s Life', - 'http://www.fieldandstream.com/blogs/a-sportsmans-life'), - ]: - self.log('Section:', title) - articles = self.make_links(url) - if articles: - feeds.append((title, articles)) - if len(feeds) > num: - break - return feeds - - def make_links(self, url): - current_articles = [] - soup = self.index_to_soup(url) - for item in soup.findAll('h2'): - link = item.find('a') - if link: - url = self.INDEX + link['href'] - title = self.tag_to_string(link) - self.log('\t', title, 'at', url) - current_articles.append( - {'title': title, 'url': url, 'description': '', 'date': ''}) - return current_articles diff --git a/recipes/furesoelokalavisen_dk.recipe b/recipes/furesoelokalavisen_dk.recipe deleted file mode 100644 index bede411a8a..0000000000 --- a/recipes/furesoelokalavisen_dk.recipe +++ /dev/null @@ -1,32 +0,0 @@ -#!/usr/bin/env python -# vim:fileencoding=utf-8 -# https://manual.calibre-ebook.com/news_recipe.html -from __future__ import absolute_import, division, print_function, unicode_literals - -from calibre.web.feeds.news import BasicNewsRecipe - -''' -Furesø Avis -''' - - -class FuresoeLokalavisen_dk(BasicNewsRecipe): - __author__ = 'CoderAllan.github.com' - title = 'Furesø Avis' - description = 'Lokale og regionale nyheder, sport og kultur fra Farum, Værløse og Furesø på furesoe.lokalavisen.dk' - category = 'newspaper, news, localnews, sport, culture, Denmark' - oldest_article = 7 - max_articles_per_feed = 50 - auto_cleanup = True - language = 'da' - - feeds = [ - ('Seneste nyt fra Furesø Avis', 'http://furesoe.lokalavisen.dk/section/senestenytrss'), - ('Seneste lokale nyheder fra Furesø Avis', 'http://furesoe.lokalavisen.dk/section/senestelokalenyhederrss'), - ('Seneste sport fra Furesø Avis', 'http://furesoe.lokalavisen.dk/section/senestesportrss'), - ('Seneste 112 nyheder fra Furesø Avis', 'http://furesoe.lokalavisen.dk/section/seneste112rss'), - ('Seneste kultur nyheder fra Furesø Avis', 'http://furesoe.lokalavisen.dk/section/senestekulturrss'), - ('Seneste læserbreve fra Furesø Avis', 'http://furesoe.lokalavisen.dk/section/senestelaeserbreverss'), - - ] - diff --git a/recipes/gamasutra_fa.recipe b/recipes/gamasutra_fa.recipe deleted file mode 100644 index ec0aba3948..0000000000 --- a/recipes/gamasutra_fa.recipe +++ /dev/null @@ -1,39 +0,0 @@ -__license__ = 'GPL v3' -__copyright__ = '2010, Darko Miletic ' -''' -gamasutra.com -''' -from calibre.web.feeds.news import BasicNewsRecipe - - -class Gamasutra(BasicNewsRecipe): - title = 'Gamasutra Featured articles' - __author__ = 'Darko Miletic' - description = 'The Art and Business of Making Games' - publisher = 'Gamasutra' - category = 'news, games, IT' - oldest_article = 2 - max_articles_per_feed = 200 - no_stylesheets = True - encoding = 'cp1252' - use_embedded_content = False - language = 'en' - remove_empty_feeds = True - masthead_url = 'http://www.gamasutra.com/images/gamasutra_logo.gif' - - conversion_options = { - 'comment': description, 'tags': category, 'publisher': publisher, 'language': language, 'linearize_tables': True - } - - remove_tags_before = dict(name="div", attrs={'class': 'page_item'}) - remove_tags = [ - dict(name='meta'), dict(name='link'), dict(name='hr'), dict(name='div', attrs={'class': 'hide-phone'}), dict(name='div', attrs={'class': 'nav_links'}), - dict(name='div', attrs={'class': 'superfooter'}), dict(name='span', attrs={'class': 'comment_text'}), dict(name='a', attrs={'type': 'button'}) - ] - remove_attributes = ['width', 'height', 'name'] - - feeds = [ - (u'Feature Articles', u'http://feeds.feedburner.com/GamasutraFeatureArticles')] - - def print_version(self, url): - return url.partition('?')[0] + '?print=1' diff --git a/recipes/gamasutra_news.recipe b/recipes/gamasutra_news.recipe deleted file mode 100644 index 6a459b6dc5..0000000000 --- a/recipes/gamasutra_news.recipe +++ /dev/null @@ -1,40 +0,0 @@ -__license__ = 'GPL v3' -__copyright__ = '2010, Darko Miletic ' -''' -gamasutra.com -''' -from calibre.web.feeds.news import BasicNewsRecipe - - -class Gamasutra(BasicNewsRecipe): - title = 'Gamasutra News' - __author__ = 'Darko Miletic' - description = 'The Art and Business of Making Games' - publisher = 'Gamasutra' - category = 'news, games, IT' - oldest_article = 2 - max_articles_per_feed = 200 - no_stylesheets = True - encoding = 'cp1252' - use_embedded_content = False - language = 'en' - remove_empty_feeds = True - masthead_url = 'http://www.gamasutra.com/images/gamasutra_logo.gif' - - conversion_options = { - 'comment': description, 'tags': category, 'publisher': publisher, 'language': language, 'linearize_tables': True - } - - remove_tags_before = dict(name="div", attrs={'class': 'page_item'}) - remove_tags = [ - dict(name='meta'), dict(name='link'), - dict(name='hr'), dict(name='div', attrs={'class': 'hide-phone'}), - dict(name='div', attrs={'class': 'nav_links'}), dict(name='div', attrs={'class': 'superfooter'}), - dict(name='span', attrs={'class': 'comment_text'}), dict(name='a', attrs={'type': 'button'}) - ] - remove_attributes = ['width', 'height', 'name'] - - feeds = [(u'News', u'http://feeds.feedburner.com/GamasutraNews')] - - def print_version(self, url): - return url.partition('?')[0] + '?print=1' diff --git a/recipes/gamespot.recipe b/recipes/gamespot.recipe deleted file mode 100644 index dd982b4e5d..0000000000 --- a/recipes/gamespot.recipe +++ /dev/null @@ -1,47 +0,0 @@ -__license__ = 'GPL v3' -__author__ = u'Marc Toensing' - -from calibre.web.feeds.news import BasicNewsRecipe - - -class GamespotCom(BasicNewsRecipe): - - title = u'Gamespot.com Reviews' - description = 'review articles from gamespot.com' - language = 'en' - __author__ = u'Marc T\xf6nsing' - - oldest_article = 7 - max_articles_per_feed = 40 - remove_empty_feeds = True - no_stylesheets = True - no_javascript = True - - feeds = [ - ('All Reviews', 'http://www.gamespot.com/rss/game_updates.php?type=5'), - ('PC Reviews', 'http://www.gamespot.com/rss/game_updates.php?type=5&platform=5'), - ('XBOX 360 Reviews', - 'http://www.gamespot.com/rss/game_updates.php?type=5&platform=1029'), - ('Wii Reviews', 'http://www.gamespot.com/rss/game_updates.php?type=5&platform=1031'), - ('PlayStation 3 Reviews', - 'http://www.gamespot.com/rss/game_updates.php?type=5&platform=1028'), - ('PlayStation 2 Reviews', - 'http://www.gamespot.com/rss/game_updates.php?type=5&platform=7'), - ('PlayStation Portable Reviews', - 'http://www.gamespot.com/rss/game_updates.php?type=5&platform=1024'), - ('Nintendo DS Reviews', - 'http://www.gamespot.com/rss/game_updates.php?type=5&platform=1026'), - ('iPhone Reviews', - 'http://www.gamespot.com/rss/game_updates.php?type=5&platform=1049'), - ] - - remove_tags = [ - dict(name='div', attrs={'class': 'top_bar'}), - dict(name='div', attrs={'class': 'video_embed'}) - ] - - def get_cover_url(self): - return 'http://image.gamespotcdn.net/gamespot/shared/gs5/gslogo_bw.gif' - - def get_article_url(self, article): - return article.get('link') + '?print=1' diff --git a/recipes/gandul.recipe b/recipes/gandul.recipe deleted file mode 100644 index 5a29c28c20..0000000000 --- a/recipes/gandul.recipe +++ /dev/null @@ -1,45 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- - -__license__ = 'GPL v3' -__copyright__ = u'2011, Silviu Cotoar\u0103' -''' -gandul.info -''' - -from calibre.web.feeds.news import BasicNewsRecipe - - -class Gandul(BasicNewsRecipe): - title = u'G\u00E2ndul' - __author__ = u'Silviu Cotoar\u0103' - publisher = 'Gandul' - description = 'Cotidian Online' - oldest_article = 5 - language = 'ro' - max_articles_per_feed = 100 - no_stylesheets = True - use_embedded_content = False - category = 'Ziare,Stiri,Romania' - encoding = 'utf-8' - cover_url = 'http://storage0.dms.mpinteractiv.ro/media/1/1/1706/1064063/1/logo.jpg?width=400' - - conversion_options = { - 'comments': description, 'tags': category, 'language': language, 'publisher': publisher - } - - keep_only_tags = [ - dict(name='div', attrs={'class': 'article'}) - ] - - remove_tags = [ - dict(name='a', attrs={'class': 'photo'}), dict( - name='div', attrs={'class': 'ad'}) - ] - - feeds = [ - (u'\u0218tiri', u'http://www.gandul.info/rss-stiri-prima-pagina.xml') - ] - - def preprocess_html(self, soup): - return self.adeify_images(soup) diff --git a/recipes/gazeta_lubuska.recipe b/recipes/gazeta_lubuska.recipe deleted file mode 100644 index 4e749e2989..0000000000 --- a/recipes/gazeta_lubuska.recipe +++ /dev/null @@ -1,69 +0,0 @@ -from calibre.web.feeds.news import BasicNewsRecipe - - -class GazetaLubuska(BasicNewsRecipe): - title = u'Gazeta Lubuska' - __author__ = 'fenuks' - description = u'Gazeta Lubuska - portal regionalny województwa lubuskiego.' - category = 'newspaper' - language = 'pl' - encoding = 'iso-8859-2' - extra_css = 'ul {list-style: none; padding:0; margin:0;}' - INDEX = 'http://www.gazetalubuska.pl' - masthead_url = INDEX + '/images/top_logo.png' - oldest_article = 7 - max_articles_per_feed = 100 - remove_empty_feeds = True - no_stylesheets = True - use_embedded_content = False - ignore_duplicate_articles = {'title', 'url'} - - feeds = [ - (u'Wszystkie', u'http://www.gazetalubuska.pl/rss.xml'), - (u'Dreznenko', u'http://www.gazetalubuska.pl/drezdenko.xml'), - (u'G\u0142og\xf3w', u'http://www.gazetalubuska.pl/glogow.xml'), - (u'Gorz\xf3w Wielkopolski', u'http://www.gazetalubuska.pl/gorzow-wielkopolski.xml'), - (u'Gubin', u'http://www.gazetalubuska.pl/gubin.xml'), - (u'Kostrzyn', u'http://www.gazetalubuska.pl/kostrzyn.xml'), - (u'Krosno Odrza\u0144skie', u'http://www.gazetalubuska.pl/krosno-odrzanskie.xml'), - (u'Lubsko', u'http://www.gazetalubuska.pl/lubsko.xml'), - (u'Mi\u0119dzych\xf3d', u'http://www.gazetalubuska.pl/miedzychod.xml'), - (u'Mi\u0119dzyrzecz', u'http://www.gazetalubuska.pl/miedzyrzecz.xml'), - (u'Nowa S\xf3l', u'http://www.gazetalubuska.pl/nowa-sol.xml'), - (u'S\u0142ubice', u'http://www.gazetalubuska.pl/slubice.xml'), - (u'Strzelce Kraje\u0144skie', u'http://www.gazetalubuska.pl/strzelce-krajenskie.xml'), - (u'Sulech\xf3w', u'http://www.gazetalubuska.pl/sulechow.xml'), - (u'Sul\u0119cin', u'http://www.gazetalubuska.pl/sulecin.xml'), - (u'\u015awi\u0119bodzin', u'http://www.gazetalubuska.pl/swiebodzin.xml'), - (u'Wolsztyn', u'http://www.gazetalubuska.pl/wolsztyn.xml'), - (u'Wschowa', u'http://www.gazetalubuska.pl/wschowa.xml'), - (u'Zielona G\xf3ra', u'http://www.gazetalubuska.pl/zielona-gora.xml'), - (u'\u017baga\u0144', u'http://www.gazetalubuska.pl/zagan.xml'), - (u'\u017bary', u'http://www.gazetalubuska.pl/zary.xml'), - (u'Sport', u'http://www.gazetalubuska.pl/sport.xml'), - (u'Auto', u'http://www.gazetalubuska.pl/auto.xml'), - (u'Dom', u'http://www.gazetalubuska.pl/dom.xml'), - (u'Praca', u'http://www.gazetalubuska.pl/praca.xml'), - (u'Zdrowie', u'http://www.gazetalubuska.pl/zdrowie.xml')] - - keep_only_tags = [dict(id='article')] - - def get_cover_url(self): - soup = self.index_to_soup( - self.INDEX + '/apps/pbcs.dll/section?Category=JEDYNKI') - nexturl = self.INDEX + soup.find(id='covers').find('a')['href'] - soup = self.index_to_soup(nexturl) - self.cover_url = self.INDEX + \ - soup.find(id='cover').find(name='img')['src'] - return getattr(self, 'cover_url', self.cover_url) - - def decode_feedportal_url(self, url): - link = url.rpartition('l/0L0S')[2][:-12] - replaces = (('0B', '.'), ('0C', '/'), ('0H', ','), - ('0D', '?'), ('0F', '='), ('0A', '0'), ('0I', '_')) - for t in replaces: - link = link.replace(*t) - return 'http://' + link - - def print_version(self, url): - return self.decode_feedportal_url(url) + '&Template=printpicart' diff --git a/recipes/gazeta_pl_bydgoszcz.recipe b/recipes/gazeta_pl_bydgoszcz.recipe deleted file mode 100644 index dbc6fc0b78..0000000000 --- a/recipes/gazeta_pl_bydgoszcz.recipe +++ /dev/null @@ -1,95 +0,0 @@ -#!/usr/bin/env python - -__license__ = 'GPL v3' - -import re - -from calibre.ebooks.BeautifulSoup import Comment -from calibre.web.feeds.news import BasicNewsRecipe - - -class gw_bydgoszcz(BasicNewsRecipe): - title = u'Gazeta Wyborcza Bydgoszcz' - __author__ = 'fenuks' - language = 'pl' - description = 'Wiadomości z Bydgoszczy na portalu Gazeta.pl.' - category = 'newspaper' - publication_type = 'newspaper' - masthead_url = 'http://bi.gazeta.pl/im/3/4089/m4089863.gif' - INDEX = 'http://bydgoszcz.gazeta.pl' - cover_url = 'http://bi.gazeta.pl/i/hp/hp2009/logo.gif' - remove_empty_feeds = True - oldest_article = 3 - max_articles_per_feed = 100 - remove_javascript = True - no_stylesheets = True - use_embedded_content = False - ignore_duplicate_articles = {'title', 'url'} - - # rules for gazeta.pl - preprocess_regexps = [ - (re.compile(u'Czytaj więcej.*', re.DOTALL), lambda m: '')] - keep_only_tags = [dict(id='gazeta_article')] - remove_tags = [dict(id=['gazeta_article_tools', 'gazeta_article_miniatures']), dict( - attrs={'class': ['mod mod_sociallist', 'c0', 'fb', 'voteNeedLogin']})] - remove_tags_after = dict(id='gazeta_article_body') - - feeds = [ - (u'Wiadomości', u'http://rss.feedsportal.com/c/32739/f/530239/index.rss')] - - def print_version(self, url): - if 'feedsportal.com' in url: - s = url.rpartition('gazeta0Bpl') - u = s[2] - if not s[0]: - u = url.rpartition('wyborcza0Bpl')[2] - u = u.replace('/l/', '/') - u = u.replace('/ia1.htm', '') - u = u.replace('0Dbo0F1', '') - u = u.replace('/story01.htm', '') - u = u.replace('0C', '/') - u = u.replace('A', '') - u = u.replace('0E', '-') - u = u.replace('0H', ',') - u = u.replace('0I', '_') - u = u.replace('0B', '.') - u = self.INDEX + u - return u - else: - return url - - def preprocess_html(self, soup): - tag = soup.find(id='Str') - if soup.find(attrs={'class': 'piano_btn_1'}): - return None - elif tag and tag.findAll('a'): - self.append_page(soup, soup.body) - return soup - - def append_page(self, soup, appendtag): - tag = soup.find('div', attrs={'id': 'Str'}) - try: - baseurl = soup.find(name='meta', attrs={ - 'property': 'og:url'})['content'] - except: - return 1 - link = tag.findAll('a')[-1] - while link: - soup2 = self.index_to_soup(baseurl + link['href']) - link = soup2.find('div', attrs={'id': 'Str'}).findAll('a')[-1] - if u'następne' not in link.string: - link = '' - pagetext = soup2.find(id='artykul') - comments = pagetext.findAll( - text=lambda text: isinstance(text, Comment)) - for comment in comments: - comment.extract() - pos = len(appendtag.contents) - appendtag.insert(pos, pagetext) - tag.extract() - - def image_url_processor(self, baseurl, url): - if url.startswith(' '): - return url.strip() - else: - return url diff --git a/recipes/gazeta_pl_szczecin.recipe b/recipes/gazeta_pl_szczecin.recipe deleted file mode 100644 index 9dd53619d1..0000000000 --- a/recipes/gazeta_pl_szczecin.recipe +++ /dev/null @@ -1,90 +0,0 @@ -# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai - -import re - -from calibre.ebooks.BeautifulSoup import Comment -from calibre.web.feeds.news import BasicNewsRecipe - - -class GazetaPlSzczecin(BasicNewsRecipe): - title = u'Gazeta Wyborcza Szczecin' - description = u'Wiadomości ze Szczecina na portalu Gazeta.pl.' - __author__ = u'Michał Szkutnik' - __license__ = u'GPL v3' - language = 'pl' - publisher = 'Agora S.A.' - category = 'news, szczecin' - INDEX = 'http://szczecin.gazeta.pl' - cover_url = 'http://bi.gazeta.pl/i/hp/hp2009/logo.gif' - remove_empty_feeds = True - oldest_article = 3 - max_articles_per_feed = 100 - remove_javascript = True - no_stylesheets = True - use_embedded_content = False - ignore_duplicate_articles = {'title', 'url'} - - # rules for gazeta.pl - preprocess_regexps = [ - (re.compile(u'Czytaj więcej.*', re.DOTALL), lambda m: '')] - keep_only_tags = [dict(id='gazeta_article')] - remove_tags = [dict(id=['gazeta_article_tools', 'gazeta_article_miniatures']), dict( - attrs={'class': ['mod mod_sociallist', 'c0', 'fb', 'voteNeedLogin']})] - remove_tags_after = dict(id='gazeta_article_body') - feeds = [(u'Wszystkie', u'http://rss.feedsportal.com/c/32739/f/530434/index.rss')] - - def print_version(self, url): - if 'feedsportal.com' in url: - s = url.rpartition('gazeta0Bpl') - u = s[2] - if not s[0]: - u = url.rpartition('wyborcza0Bpl')[2] - u = u.replace('/l/', '/') - u = u.replace('/ia1.htm', '') - u = u.replace('/story01.htm', '') - u = u.replace('0C', '/') - u = u.replace('A', '') - u = u.replace('0E', '-') - u = u.replace('0H', ',') - u = u.replace('0I', '_') - u = u.replace('0B', '.') - u = self.INDEX + u - return u - else: - return url - - def preprocess_html(self, soup): - tag = soup.find(id='Str') - if soup.find(attrs={'class': 'piano_btn_1'}): - return None - elif tag and tag.findAll('a'): - self.append_page(soup, soup.body) - return soup - - def append_page(self, soup, appendtag): - tag = soup.find('div', attrs={'id': 'Str'}) - try: - baseurl = soup.find(name='meta', attrs={ - 'property': 'og:url'})['content'] - except: - return 1 - link = tag.findAll('a')[-1] - while link: - soup2 = self.index_to_soup(baseurl + link['href']) - link = soup2.find('div', attrs={'id': 'Str'}).findAll('a')[-1] - if u'następne' not in link.string: - link = '' - pagetext = soup2.find(id='artykul') - comments = pagetext.findAll( - text=lambda text: isinstance(text, Comment)) - for comment in comments: - comment.extract() - pos = len(appendtag.contents) - appendtag.insert(pos, pagetext) - tag.extract() - - def image_url_processor(self, baseurl, url): - if url.startswith(' '): - return url.strip() - else: - return url diff --git a/recipes/gazeta_pomorska.recipe b/recipes/gazeta_pomorska.recipe deleted file mode 100644 index 0a4d2adebb..0000000000 --- a/recipes/gazeta_pomorska.recipe +++ /dev/null @@ -1,72 +0,0 @@ -from calibre.web.feeds.news import BasicNewsRecipe - - -class GazetaPomorska(BasicNewsRecipe): - title = u'Gazeta Pomorska' - __author__ = 'Richard z forum.eksiazki.org, fenuks' - description = u'Gazeta Pomorska - portal regionalny' - category = 'newspaper' - language = 'pl' - encoding = 'iso-8859-2' - extra_css = 'ul {list-style: none; padding:0; margin:0;}' - INDEX = 'http://www.pomorska.pl' - masthead_url = INDEX + '/images/top_logo.png' - oldest_article = 7 - max_articles_per_feed = 100 - remove_empty_feeds = True - no_stylesheets = True - use_embedded_content = False - ignore_duplicate_articles = {'title', 'url'} - - feeds = [(u'Wszystkie', u'http://www.pomorska.pl/rss.xml'), - (u'Region', u'http://www.pomorska.pl/region.xml'), - (u'Bydgoszcz', u'http://www.pomorska.pl/bydgoszcz.xml'), - (u'Nakło', u'http://www.pomorska.pl/naklo.xml'), - (u'Koronowo', u'http://www.pomorska.pl/koronowo.xml'), - (u'Solec Kujawski', u'http://www.pomorska.pl/soleckujawski.xml'), - (u'Grudziądz', u'http://www.pomorska.pl/grudziadz.xml'), - (u'Inowrocław', u'http://www.pomorska.pl/inowroclaw.xml'), - (u'Toruń', u'http://www.pomorska.pl/torun.xml'), - (u'Włocławek', u'http://www.pomorska.pl/wloclawek.xml'), - (u'Aleksandrów Kujawski', - u'http://www.pomorska.pl/aleksandrow.xml'), - (u'Brodnica', u'http://www.pomorska.pl/brodnica.xml'), - (u'Chełmno', u'http://www.pomorska.pl/chelmno.xml'), - (u'Chojnice', u'http://www.pomorska.pl/chojnice.xml'), - (u'Ciechocinek', u'http://www.pomorska.pl/ciechocinek.xml'), - (u'Golub-Dobrzyń', u'http://www.pomorska.pl/golubdobrzyn.xml'), - (u'Mogilno', u'http://www.pomorska.pl/mogilno.xml'), - (u'Radziejów', u'http://www.pomorska.pl/radziejow.xml'), - (u'Rypin', u'http://www.pomorska.pl/rypin.xml'), - (u'Sępólno', u'http://www.pomorska.pl/sepolno.xml'), - (u'Świecie', u'http://www.pomorska.pl/swiecie.xml'), - (u'Tuchola', u'http://www.pomorska.pl/tuchola.xml'), - (u'Żnin', u'http://www.pomorska.pl/znin.xml'), - (u'Sport', u'http://www.pomorska.pl/sport.xml'), - (u'Zdrowie', u'http://www.pomorska.pl/zdrowie.xml'), - (u'Auto', u'http://www.pomorska.pl/moto.xml'), - (u'Dom', u'http://www.pomorska.pl/dom.xml'), - # (u'Reporta\u017c', u'http://www.pomorska.pl/reportaz.xml'), - (u'Gospodarka', u'http://www.pomorska.pl/gospodarka.xml')] - - keep_only_tags = [dict(id='article')] - - def get_cover_url(self): - soup = self.index_to_soup( - self.INDEX + '/apps/pbcs.dll/section?Category=JEDYNKI') - nexturl = self.INDEX + soup.find(id='covers').find('a')['href'] - soup = self.index_to_soup(nexturl) - self.cover_url = self.INDEX + \ - soup.find(id='cover').find(name='img')['src'] - return getattr(self, 'cover_url', self.cover_url) - - def decode_feedportal_url(self, url): - link = url.rpartition('l/0L0S')[2][:-12] - replaces = (('0B', '.'), ('0C', '/'), ('0H', ','), - ('0D', '?'), ('0F', '='), ('0A', '0'), ('0I', '_')) - for t in replaces: - link = link.replace(*t) - return 'http://' + link - - def print_version(self, url): - return self.decode_feedportal_url(url) + '&Template=printpicart' diff --git a/recipes/gazeta_wroclawska.recipe b/recipes/gazeta_wroclawska.recipe deleted file mode 100644 index e4935d2511..0000000000 --- a/recipes/gazeta_wroclawska.recipe +++ /dev/null @@ -1,46 +0,0 @@ -from calibre.web.feeds.news import BasicNewsRecipe - - -class GazetaWroclawska(BasicNewsRecipe): - title = u'Gazeta Wroc\u0142awska' - __author__ = 'fenuks' - description = u'Gazeta Regionalna Gazeta Wrocławska. Najnowsze Wiadomości Wrocław, Informacje Wrocław. Czytaj!' - category = 'newspaper' - language = 'pl' - encoding = 'iso-8859-2' - masthead_url = 'http://s.polskatimes.pl/g/logo_naglowek/gazetawroclawska.png?24' - oldest_article = 7 - max_articles_per_feed = 100 - remove_empty_feeds = True - no_stylesheets = True - use_embedded_content = False - ignore_duplicate_articles = {'title', 'url'} - remove_tags_after = dict(attrs={'src': 'http://nm.dz.com.pl/dz.png'}) - remove_tags = [dict(id='mat-podobne'), dict(name='a', attrs={ - 'class': 'czytajDalej'}), dict(attrs={'src': 'http://nm.dz.com.pl/dz.png'})] - - feeds = [ - (u'Fakty24', u'http://gazetawroclawska.feedsportal.com/c/32980/f/533775/index.rss?201302'), - (u'Region', u'http://www.gazetawroclawska.pl/rss/gazetawroclawska_region.xml?201302'), - (u'Kultura', u'http://gazetawroclawska.feedsportal.com/c/32980/f/533777/index.rss?201302'), - (u'Sport', u'http://gazetawroclawska.feedsportal.com/c/32980/f/533776/index.rss?201302'), - (u'Z archiwum', u'http://www.gazetawroclawska.pl/rss/gazetawroclawska_zarchiwum.xml?201302'), - - (u'M\xf3j reporter', u'http://www.gazetawroclawska.pl/rss/gazetawroclawska_mojreporter.xml?201302'), - (u'Historia', u'http://www.gazetawroclawska.pl/rss/gazetawroclawska_historia.xml?201302'), - (u'Listy do redakcji', u'http://www.gazetawroclawska.pl/rss/gazetawroclawska_listydoredakcji.xml?201302'), - (u'Na drogach', u'http://www.gazetawroclawska.pl/rss/gazetawroclawska_nadrogach.xml?201302')] - - def print_version(self, url): - return url.replace('artykul', 'drukuj') - - def skip_ad_pages(self, soup): - if 'Advertisement' in soup.title: - nexturl = soup.find('a')['href'] - return self.index_to_soup(nexturl, raw=True) - - def get_cover_url(self): - soup = self.index_to_soup( - 'http://www.prasa24.pl/gazeta/gazeta-wroclawska/') - self.cover_url = soup.find(id='pojemnik').img['src'] - return getattr(self, 'cover_url', self.cover_url) diff --git a/recipes/gazeta_wspolczesna.recipe b/recipes/gazeta_wspolczesna.recipe deleted file mode 100644 index 495c310e50..0000000000 --- a/recipes/gazeta_wspolczesna.recipe +++ /dev/null @@ -1,68 +0,0 @@ -from calibre.web.feeds.news import BasicNewsRecipe - - -class GazetaWspolczesna(BasicNewsRecipe): - title = u'Gazeta Wsp\xf3\u0142czesna' - __author__ = 'fenuks' - description = u'Gazeta Współczesna - portal regionalny.' - category = 'newspaper' - language = 'pl' - encoding = 'iso-8859-2' - extra_css = 'ul {list-style: none; padding:0; margin:0;}' - INDEX = 'http://www.wspolczesna.pl' - masthead_url = INDEX + '/images/top_logo.png' - oldest_article = 7 - max_articles_per_feed = 100 - remove_empty_feeds = True - no_stylesheets = True - use_embedded_content = False - ignore_duplicate_articles = {'title', 'url'} - - feeds = [ - (u'Wszystkie', u'http://www.wspolczesna.pl/rss.xml'), - (u'August\xf3w', u'http://www.wspolczesna.pl/augustow.xml'), - (u'Bia\u0142ystok', u'http://www.wspolczesna.pl/bialystok.xml'), - (u'Bielsk Podlaski', u'http://www.wspolczesna.pl/bielsk.xml'), - (u'E\u0142k', u'http://www.wspolczesna.pl/elk.xml'), - (u'Grajewo', u'http://www.wspolczesna.pl/grajewo.xml'), - (u'Go\u0142dap', u'http://www.wspolczesna.pl/goldap.xml'), - (u'Hajn\xf3wka', u'http://www.wspolczesna.pl/hajnowka.xml'), - (u'Kolno', u'http://www.wspolczesna.pl/kolno.xml'), - (u'\u0141om\u017ca', u'http://www.wspolczesna.pl/lomza.xml'), - (u'Mo\u0144ki', u'http://www.wspolczesna.pl/monki.xml'), - (u'Olecko', u'http://www.wspolczesna.pl/olecko.xml'), - (u'Ostro\u0142\u0119ka', u'http://www.wspolczesna.pl/ostroleka.xml'), - (u'Powiat Bia\u0142ostocki', u'http://www.wspolczesna.pl/powiat.xml'), - (u'Sejny', u'http://www.wspolczesna.pl/sejny.xml'), - (u'Siemiatycze', u'http://www.wspolczesna.pl/siemiatycze.xml'), - (u'Sok\xf3\u0142ka', u'http://www.wspolczesna.pl/sokolka.xml'), - (u'Suwa\u0142ki', u'http://www.wspolczesna.pl/suwalki.xml'), - (u'Wysokie Mazowieckie', u'http://www.wspolczesna.pl/wysokie.xml'), - (u'Zambr\xf3w', u'http://www.wspolczesna.pl/zambrow.xml'), - (u'Sport', u'http://www.wspolczesna.pl/sport.xml'), - (u'Praca', u'http://www.wspolczesna.pl/praca.xml'), - (u'Dom', u'http://www.wspolczesna.pl/dom.xml'), - (u'Auto', u'http://www.wspolczesna.pl/auto.xml'), - (u'Zdrowie', u'http://www.wspolczesna.pl/zdrowie.xml')] - - keep_only_tags = [dict(id='article')] - - def get_cover_url(self): - soup = self.index_to_soup( - self.INDEX + '/apps/pbcs.dll/section?Category=JEDYNKI') - nexturl = self.INDEX + soup.find(id='covers').find('a')['href'] - soup = self.index_to_soup(nexturl) - self.cover_url = self.INDEX + \ - soup.find(id='cover').find(name='img')['src'] - return getattr(self, 'cover_url', self.cover_url) - - def decode_feedportal_url(self, url): - link = url.rpartition('l/0L0S')[2][:-12] - replaces = (('0B', '.'), ('0C', '/'), ('0H', ','), - ('0D', '?'), ('0F', '='), ('0A', '0'), ('0I', '_')) - for t in replaces: - link = link.replace(*t) - return 'http://' + link - - def print_version(self, url): - return self.decode_feedportal_url(url) + '&Template=printpicart' diff --git a/recipes/gazeta_wyborcza.recipe b/recipes/gazeta_wyborcza.recipe deleted file mode 100644 index 9edcee6fb2..0000000000 --- a/recipes/gazeta_wyborcza.recipe +++ /dev/null @@ -1,126 +0,0 @@ -# -*- coding: utf-8 -*- -import re - -from calibre.ebooks.BeautifulSoup import Comment -from calibre.web.feeds.news import BasicNewsRecipe - - -class Gazeta_Wyborcza(BasicNewsRecipe): - title = u'Gazeta Wyborcza' - __author__ = 'fenuks, Artur Stachecki' - language = 'pl' - description = 'Wiadomości z Polski i ze świata. Serwisy tematyczne i lokalne w 20 miastach.' - category = 'newspaper' - publication_type = 'newspaper' - # encoding = 'iso-8859-2' - masthead_url = 'http://bi.gazeta.pl/im/5/10285/z10285445AA.jpg' - INDEX = 'http://wyborcza.pl' - remove_empty_feeds = True - oldest_article = 3 - max_articles_per_feed = 100 - remove_javascript = True - no_stylesheets = True - use_embedded_content = False - ignore_duplicate_articles = {'title', 'url'} - - # rules for gazeta.pl - preprocess_regexps = [ - (re.compile(u'Czytaj więcej.*', re.DOTALL), lambda m: '')] - keep_only_tags = [dict(id='gazeta_article')] - remove_tags = [dict(id=['gazeta_article_tools', 'gazeta_article_miniatures']), dict( - attrs={'class': ['mod mod_sociallist', 'c0', 'fb', 'voteNeedLogin']})] - remove_tags_after = dict(id='gazeta_article_body') - - # rules for wyborcza.biz - preprocess_regexps.append((re.compile( - u'(
)?(
)? Czytaj (także|też):.*?\\.?
', re.DOTALL), lambda m: '')) - - feeds = [(u'Kraj', u'http://rss.feedsportal.com/c/32739/f/530266/index.rss'), - (u'\u015awiat', u'http://rss.feedsportal.com/c/32739/f/530270/index.rss'), - (u'Wyborcza.biz', u'http://wyborcza.biz/pub/rss/wyborcza_biz_wiadomosci.htm'), - (u'Komentarze', u'http://rss.feedsportal.com/c/32739/f/530312/index.rss'), - (u'Kultura', u'http://rss.gazeta.pl/pub/rss/gazetawyborcza_kultura.xml'), - (u'Nauka', u'http://rss.feedsportal.com/c/32739/f/530269/index.rss'), - (u'Opinie', u'http://rss.gazeta.pl/pub/rss/opinie.xml'), - (u'Gazeta \u015awi\u0105teczna', - u'http://rss.feedsportal.com/c/32739/f/530431/index.rss'), - (u'Du\u017cy Format', - u'http://rss.feedsportal.com/c/32739/f/530265/index.rss'), - (u'Witamy w Polsce', u'http://rss.feedsportal.com/c/32739/f/530476/index.rss'), - (u'M\u0119ska Muzyka', - u'http://rss.feedsportal.com/c/32739/f/530337/index.rss'), - (u'Lata Lec\u0105', u'http://rss.feedsportal.com/c/32739/f/530326/index.rss'), - (u'Solidarni z Tybetem', - u'http://rss.feedsportal.com/c/32739/f/530461/index.rss'), - (u'W pon. - \u017bakowski', - u'http://rss.feedsportal.com/c/32739/f/530491/index.rss'), - (u'We wt. - Kolenda-Zalewska', - u'http://rss.feedsportal.com/c/32739/f/530310/index.rss'), - (u'\u015aroda w \u015brod\u0119', - u'http://rss.feedsportal.com/c/32739/f/530428/index.rss'), - (u'W pi\u0105tek - Olejnik', - u'http://rss.feedsportal.com/c/32739/f/530364/index.rss'), - (u'Nekrologi', u'http://rss.feedsportal.com/c/32739/f/530358/index.rss') - ] - - def print_version(self, url): - if 'feedsportal.com' in url: - s = url.rpartition('wyborcza0Bpl') - u = s[2] - if not s[0]: - u = url.rpartition('gazeta0Bpl')[2] - u = u.replace('/l/', '/') - u = u.replace('/ia1.htm', '') - u = u.replace('/story01.htm', '') - u = u.replace('0C', '/') - u = u.replace('A', '') - u = u.replace('0E', '-') - u = u.replace('0H', ',') - u = u.replace('0I', '_') - u = u.replace('0B', '.') - u = self.INDEX + u - return u - else: - return url - - def preprocess_html(self, soup): - tag = soup.find(id='Str') - if soup.find(attrs={'class': 'piano_btn_1'}): - return None - elif tag and tag.findAll('a'): - self.append_page(soup, soup.body) - return soup - - def append_page(self, soup, appendtag): - tag = soup.find('div', attrs={'id': 'Str'}) - try: - baseurl = soup.find(name='meta', attrs={ - 'property': 'og:url'})['content'] - except: - return 1 - link = tag.findAll('a')[-1] - while link: - soup2 = self.index_to_soup(baseurl + link['href']) - link = soup2.find('div', attrs={'id': 'Str'}).findAll('a')[-1] - if u'następne' not in link.string: - link = '' - pagetext = soup2.find(id='artykul') - comments = pagetext.findAll( - text=lambda text: isinstance(text, Comment)) - for comment in comments: - comment.extract() - pos = len(appendtag.contents) - appendtag.insert(pos, pagetext) - tag.extract() - - def get_cover_url(self): - soup = self.index_to_soup('http://wyborcza.pl/0,76762,3751429.html') - cover = soup.find(attrs={'class': 'gallerycontent'}) - self.cover_url = cover.ul.li.a.img['src'].replace('P.jpg', '.jpg') - return getattr(self, 'cover_url', self.cover_url) - - def image_url_processor(self, baseurl, url): - if url.startswith(' '): - return url.strip() - else: - return url diff --git a/recipes/gcn.recipe b/recipes/gcn.recipe deleted file mode 100644 index f8522f98cb..0000000000 --- a/recipes/gcn.recipe +++ /dev/null @@ -1,64 +0,0 @@ -from calibre.web.feeds.news import BasicNewsRecipe - - -class GCN(BasicNewsRecipe): - title = u'Gazeta Codziennej Nowiny' - __author__ = 'fenuks' - description = u'nowiny24.pl - portal regionalny województwa podkarpackiego.' - category = 'newspaper' - language = 'pl' - encoding = 'iso-8859-2' - extra_css = 'ul {list-style: none; padding:0; margin:0;}' - INDEX = 'http://www.nowiny24.pl' - masthead_url = INDEX + '/images/top_logo.png' - oldest_article = 7 - max_articles_per_feed = 100 - remove_empty_feeds = True - no_stylesheets = True - ignore_duplicate_articles = {'title', 'url'} - remove_attributes = ['style'] - use_embedded_content = False - - feeds = [(u'Wszystkie', u'http://www.nowiny24.pl/rss.xml'), - (u'Podkarpacie', u'http://www.nowiny24.pl/podkarpacie.xml'), - (u'Bieszczady', u'http://www.nowiny24.pl/bieszczady.xml'), - (u'Rzeszów', u'http://www.nowiny24.pl/rzeszow.xml'), - (u'Przemyśl', u'http://www.nowiny24.pl/przemysl.xml'), - (u'Leżajsk', u'http://www.nowiny24.pl/lezajsk.xml'), - (u'Łańcut', u'http://www.nowiny24.pl/lancut.xml'), - (u'Dębica', u'http://www.nowiny24.pl/debica.xml'), - (u'Jarosław', u'http://www.nowiny24.pl/jaroslaw.xml'), - (u'Krosno', u'http://www.nowiny24.pl/krosno.xml'), - (u'Mielec', u'http://www.nowiny24.pl/mielec.xml'), - (u'Nisko', u'http://www.nowiny24.pl/nisko.xml'), - (u'Sanok', u'http://www.nowiny24.pl/sanok.xml'), - (u'Stalowa Wola', u'http://www.nowiny24.pl/stalowawola.xml'), - (u'Tarnobrzeg', u'http://www.nowiny24.pl/tarnobrzeg.xml'), - (u'Sport', u'http://www.nowiny24.pl/sport.xml'), - (u'Dom', u'http://www.nowiny24.pl/dom.xml'), - (u'Auto', u'http://www.nowiny24.pl/auto.xml'), - (u'Praca', u'http://www.nowiny24.pl/praca.xml'), - (u'Zdrowie', u'http://www.nowiny24.pl/zdrowie.xml'), - (u'Wywiady', u'http://www.nowiny24.pl/wywiady.xml')] - - keep_only_tags = [dict(id='article')] - - def get_cover_url(self): - soup = self.index_to_soup( - self.INDEX + '/apps/pbcs.dll/section?Category=JEDYNKI') - nexturl = self.INDEX + soup.find(id='covers').find('a')['href'] - soup = self.index_to_soup(nexturl) - self.cover_url = self.INDEX + \ - soup.find(id='cover').find(name='img')['src'] - return getattr(self, 'cover_url', self.cover_url) - - def decode_feedportal_url(self, url): - link = url.rpartition('l/0L0S')[2][:-12] - replaces = (('0B', '.'), ('0C', '/'), ('0H', ','), - ('0D', '?'), ('0F', '='), ('0A', '0'), ('0I', '_')) - for t in replaces: - link = link.replace(*t) - return 'http://' + link - - def print_version(self, url): - return self.decode_feedportal_url(url) + '&Template=printpicart' diff --git a/recipes/geek_poke.recipe b/recipes/geek_poke.recipe deleted file mode 100644 index 027532b1f3..0000000000 --- a/recipes/geek_poke.recipe +++ /dev/null @@ -1,80 +0,0 @@ -import re - -from calibre.utils.magick import Image, create_canvas -from calibre.web.feeds.news import BasicNewsRecipe - - -class AdvancedUserRecipe1307556816(BasicNewsRecipe): - title = u'Geek and Poke' - __author__ = u'DrMerry' - description = u'Geek and Poke Cartoons' - publisher = u'Oliver Widder' - author = u'Oliver Widder, DrMerry (calibre-code), calibre' - oldest_article = 31 - max_articles_per_feed = 100 - language = u'en' - simultaneous_downloads = 1 - timefmt = ' [%a, %d %B, %Y]' - summary_length = -1 - no_stylesheets = True - category = 'News.IT, Cartoon, Humor, Geek' - use_embedded_content = False - cover_url = 'http://geekandpoke.typepad.com/aboutcoders.jpeg' - remove_javascript = True - remove_empty_feeds = True - publication_type = 'blog' - masthead_url = None - conversion_options = { - 'comments': '', 'tags': category, 'language': language, 'publisher': publisher, 'author': author - } - - remove_tags_before = dict(name='p', attrs={'class': 'content-nav'}) - remove_tags_after = dict(name='div', attrs={'class': 'entry-content'}) - remove_tags = [dict(name='div', attrs={'class': 'entry-footer'}), - dict(name='div', attrs={'id': 'alpha'}), - dict(name='div', attrs={'id': 'gamma'}), - dict(name='iframe'), - dict(name='p', attrs={'class': 'content-nav'})] - - filter_regexps = [(r'feedburner\.com'), - (r'pixel.quantserve\.com'), - (r'googlesyndication\.com'), - (r'yimg\.com'), - (r'scorecardresearch\.com')] - - preprocess_regexps = [ - (re.compile(r'(

( |\s)*

|]*>Tweet|]*>|||]*>[^<]*[^<]*)', re.DOTALL | re.IGNORECASE), lambda match: ''), - (re.compile(r'( |\s\s)+\s*', re.DOTALL | - re.IGNORECASE), lambda match: ' '), - (re.compile(r'(]*>)]>((?!', re.DOTALL | - re.IGNORECASE), lambda match: match.group(1) + match.group(2) + ''), - (re.compile(r'(]*alt="([^"]*)"[^>]*>)', re.DOTALL | re.IGNORECASE), - lambda match: '
' + match.group(2) + '
' + match.group(1) + '
'), - (re.compile(r'()+', re.DOTALL | - re.IGNORECASE), lambda match: '
'), - ] - - extra_css = 'body, h3, p, div, span{margin:0px; padding:0px} h3.entry-header{font-size: 0.8em} div.entry-body{font-size: 0.7em}' - - def postprocess_html(self, soup, first): - for tag in soup.findAll('img', src=True): - iurl = tag['src'] - img = Image() - img.open(iurl) - # print '***img is: ', iurl, '\n****width is: ', width, 'height is: - # ', height - img.trim(0) - # print '***TRIMMED img width is: ', width, 'height is: ', height - left = 0 - top = 0 - border_color = '#ffffff' - width, height = img.size - # print '***retrieved img width is: ', width, 'height is: ', height - height_correction = 1.17 - canvas = create_canvas( - width, height * height_correction, border_color) - canvas.compose(img, left, top) - canvas.save(iurl) - return soup - - feeds = ['http://feeds.feedburner.com/GeekAndPoke?format=xml'] diff --git a/recipes/gentoftelokalavisen_dk.recipe b/recipes/gentoftelokalavisen_dk.recipe deleted file mode 100644 index 1b023c11ca..0000000000 --- a/recipes/gentoftelokalavisen_dk.recipe +++ /dev/null @@ -1,32 +0,0 @@ -#!/usr/bin/env python -# vim:fileencoding=utf-8 -# https://manual.calibre-ebook.com/news_recipe.html -from __future__ import absolute_import, division, print_function, unicode_literals - -from calibre.web.feeds.news import BasicNewsRecipe - -''' -Villabyerne -''' - - -class GentofteLokalavisen_dk(BasicNewsRecipe): - __author__ = 'CoderAllan.github.com' - title = 'Villabyerne' - description = 'Lokale og regionale nyheder, sport og kultur fra Gentofte, Hellerup og Charlottenlund på gentofte.lokalavisen.dk' - category = 'newspaper, news, localnews, sport, culture, Denmark' - oldest_article = 7 - max_articles_per_feed = 50 - auto_cleanup = True - language = 'da' - - feeds = [ - ('Seneste nyt fra Villabyerne', 'http://gentofte.lokalavisen.dk/section/senestenytrss'), - ('Seneste lokale nyheder fra Villabyerne', 'http://gentofte.lokalavisen.dk/section/senestelokalenyhederrss'), - ('Seneste sport fra Villabyerne', 'http://gentofte.lokalavisen.dk/section/senestesportrss'), - ('Seneste 112 nyheder fra Villabyerne', 'http://gentofte.lokalavisen.dk/section/seneste112rss'), - ('Seneste kultur nyheder fra Villabyerne', 'http://gentofte.lokalavisen.dk/section/senestekulturrss'), - ('Seneste læserbreve fra Villabyerne', 'http://gentofte.lokalavisen.dk/section/senestelaeserbreverss'), - - ] - diff --git a/recipes/german_gov.recipe b/recipes/german_gov.recipe deleted file mode 100644 index 21fe87df16..0000000000 --- a/recipes/german_gov.recipe +++ /dev/null @@ -1,31 +0,0 @@ -import re - -from calibre.web.feeds.news import BasicNewsRecipe - - -class GermanGovernmentPress(BasicNewsRecipe): - title = u'Pressemitteilungen der Bundesregierung' - oldest_article = 14 - __author__ = 'malfi' - max_articles_per_feed = 100 - no_stylesheets = True - cover_url = 'http://www.bundesregierung.de/static/images/logoBR.gif' - language = 'de' - keep_only_tags = [] - keep_only_tags.append(dict(name='h2')) - keep_only_tags.append(dict(name='div', attrs={'class': 'textblack'})) - keep_only_tags.append(dict(name='div', attrs={'class': 'subtitle'})) - keep_only_tags.append(dict(name='div', attrs={'class': 'text'})) - remove_tags = [] - feeds = [ - (u'Pressemitteilungen', u'http://www.bundesregierung.de/Webs/Breg/DE/Service/RSS/Functions/bundesregierungPressemitteilungenRSS20,templateId=renderNewsfeed.rdf')] # noqa - extra_css = ''' - h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;} - h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;} - p{font-family:Arial,Helvetica,sans-serif;font-size:small;} - body{font-family:Helvetica,Arial,sans-serif;font-size:small;} - ''' - - def print_version(self, url): - m = re.search(r'^(.*).html$', url) - return str(m.group(1)) + ',layoutVariant=Druckansicht.html' diff --git a/recipes/gezgin_dergi.recipe b/recipes/gezgin_dergi.recipe deleted file mode 100644 index 9dff4e0cae..0000000000 --- a/recipes/gezgin_dergi.recipe +++ /dev/null @@ -1,14 +0,0 @@ -# -*- coding: utf-8 -*- - -from calibre.web.feeds.news import BasicNewsRecipe - - -class BasicUserRecipe1390492898(BasicNewsRecipe): - title = u'Gezgin Dergi' - __author__ = 'asalet_r' - language = 'tr' - oldest_article = 7 - max_articles_per_feed = 100 - auto_cleanup = True - - feeds = [(u'Gezgin Dergi', u'http://www.gezgindergi.com/feed/')] diff --git a/recipes/gildia_pl.recipe b/recipes/gildia_pl.recipe deleted file mode 100644 index dc349ee206..0000000000 --- a/recipes/gildia_pl.recipe +++ /dev/null @@ -1,72 +0,0 @@ -import re - -from calibre.web.feeds.news import BasicNewsRecipe - - -class Gildia(BasicNewsRecipe): - title = u'Gildia.pl' - __author__ = 'fenuks' - description = u'Fantastyczny Portal Kulturalny - newsy, recenzje, galerie, wywiady. Literatura, film, gry komputerowe i planszowe, komiks, RPG, sklep. Nie lekceważ potęgi wyobraźni!' # noqa - cover_url = 'http://www.film.gildia.pl/_n_/portal/redakcja/logo/logo-gildia.pl-500.jpg' - category = 'culture' - cover_url = 'http://portal.gildia.pl/images/logo-main.png' - language = 'pl' - oldest_article = 8 - max_articles_per_feed = 100 - remove_empty_feeds = True - no_stylesheets = True - use_embedded_content = False - ignore_duplicate_articles = {'title', 'url'} - preprocess_regexps = [(re.compile(u''), lambda match: '')] - ignore_duplicate_articles = {'title', 'url'} - remove_tags = [dict(name='div', attrs={'class': [ - 'backlink', 'im_img', 'addthis_toolbox addthis_default_style', 'banner-bottom']})] - keep_only_tags = [dict(name='div', attrs={'class': 'widetext'}), dict(name='article', attrs={'id': re.compile(r'post-\d+')})] - feeds = [(u'Gry', u'http://www.gry.gildia.pl/rss'), - (u'Literatura', u'http://www.literatura.gildia.pl/rss'), - (u'Film', u'http://www.film.gildia.pl/rss'), - (u'Horror', u'http://www.horror.gildia.pl/rss'), - (u'Konwenty', u'http://www.konwenty.gildia.pl/rss'), - (u'Plansz\xf3wki', u'http://www.planszowki.gildia.pl/rss'), - (u'Manga i anime', u'http://www.manga.gildia.pl/rss'), - (u'Star Wars', u'http://www.starwars.gildia.pl/rss'), - (u'Techno', u'http://www.techno.gildia.pl/rss'), - (u'Historia', u'http://www.historia.gildia.pl/rss'), - (u'Magia', u'http://www.magia.gildia.pl/rss'), - (u'Bitewniaki', u'http://www.bitewniaki.gildia.pl/rss'), - (u'RPG', u'http://www.rpg.gildia.pl/rss'), - (u'LARP', u'http://www.larp.gildia.pl/rss'), - (u'Muzyka', u'http://www.muzyka.gildia.pl/rss'), - (u'Nauka', u'http://www.nauka.gildia.pl/rss'), - ] - - def skip_ad_pages(self, soup): - content = soup.find('div', attrs={'class': 'news'}) - if content is None: - return - - words = ('recenzj', 'zapowied', 'fragmen', - 'relacj', 'wywiad', 'nominacj') - document_title = soup.title.renderContents().decode('utf-8').lower() - for word in words: - if word in document_title: - for link in content.findAll(name='a'): - if word in link['href'] or (link.string and word in link.string): - return self.index_to_soup(link['href'], raw=True) - for tag in content.findAll(name='a', href=re.compile('/publicystyka/')): - if 'Więcej...' == tag.string: - return self.index_to_soup(tag['href'], raw=True) - - def preprocess_html(self, soup): - title = soup.title.renderContents().decode('utf-8').lower() - for a in soup('a', href=True): - if not a['href'].startswith('http'): - if '/gry/' in a['href']: - a['href'] = 'http://www.gry.gildia.pl' + a['href'] - elif u'książk' in title or u'komiks' in title: - a['href'] = 'http://www.literatura.gildia.pl' + a['href'] - elif u'komiks' in title: - a['href'] = 'http://www.literatura.gildia.pl' + a['href'] - else: - a['href'] = 'http://www.gildia.pl' + a['href'] - return soup diff --git a/recipes/gizmodo.recipe b/recipes/gizmodo.recipe deleted file mode 100644 index 01430e87d7..0000000000 --- a/recipes/gizmodo.recipe +++ /dev/null @@ -1,36 +0,0 @@ - -__license__ = 'GPL v3' -__copyright__ = '2010, Darko Miletic ' -''' -gizmodo.com -''' - -from calibre.web.feeds.news import BasicNewsRecipe - - -class Gizmodo(BasicNewsRecipe): - title = 'Gizmodo' - __author__ = 'Darko Miletic' - description = "Gizmodo, the gadget guide. So much in love with shiny new toys, it's unnatural." - publisher = 'gizmodo.com' - category = 'news, IT, Internet, gadgets' - oldest_article = 2 - max_articles_per_feed = 100 - no_stylesheets = True - encoding = 'utf-8' - use_embedded_content = True - language = 'en' - masthead_url = 'http://cache.gawkerassets.com/assets/gizmodo.com/img/logo.png' - - conversion_options = { - 'comment': description, 'tags': category, 'publisher': publisher, 'language': language - } - - feeds = [(u'Articles', u'http://feeds.gawker.com/gizmodo/vip?format=xml')] - - remove_tags = [ - {'class': 'feedflare'}, - ] - - def preprocess_html(self, soup): - return self.adeify_images(soup) diff --git a/recipes/glamour.recipe b/recipes/glamour.recipe deleted file mode 100644 index d53edb4176..0000000000 --- a/recipes/glamour.recipe +++ /dev/null @@ -1,36 +0,0 @@ -from calibre.web.feeds.news import BasicNewsRecipe - - -class AdvancedUserRecipe1305547242(BasicNewsRecipe): - title = u'Glamour (US)' - oldest_article = 21 - max_articles_per_feed = 100 - no_stylesheets = True - use_embedded_content = False - language = 'en' - remove_javascript = True - __author__ = 'Anonymous' - auto_cleanup = True - - feeds = [ - (u'All Fashion', - u'http://feeds.glamour.com/glamour/all_fashion'), - (u'All Beauty', - u'http://feeds.glamour.com/glamour/all_beauty'), - (u'All Sex, Love & Life', - u'http://feeds.glamour.com/glamour/sex_love_life'), - (u'All Health & Fitness', - u'http://feeds.glamour.com/glamour/health_fitness'), - (u'Slaves to Fashion blog', - u'http://feeds.glamour.com/glamour/slavestofashion'), - (u'The Girls in the Beauty Department', - u'http://feeds.glamour.com/glamour/thegirlsinthebeautydepartment'), - (u'Smitten blog', - u'http://feeds.glamour.com/glamour/smitten'), - (u'Save the Date', - u'http://feeds.feedburner.com/glamour/save-the-date'), - (u'Save the Date', - u'http://feeds.feedburner.com/glamour/save-the-date'), - (u'Vitamin G blog', - u'http://feeds.glamour.com/glamour/vitamin-g'), - ] diff --git a/recipes/glennbeck.recipe b/recipes/glennbeck.recipe deleted file mode 100644 index 3ce2362812..0000000000 --- a/recipes/glennbeck.recipe +++ /dev/null @@ -1,100 +0,0 @@ -from calibre.ebooks.BeautifulSoup import BeautifulSoup, Comment, Tag -from calibre.web.feeds.news import BasicNewsRecipe - - -def new_tag(soup, name, attrs=()): - impl = getattr(soup, 'new_tag', None) - if impl is not None: - return impl(name, attrs=dict(attrs)) - return Tag(soup, name, attrs=attrs or None) - - -class GlennBeckRecipe(BasicNewsRecipe): - __license__ = 'GPL v3' - __author__ = 'kwetal' - language = 'en' - version = 1 - - title = u'Glenn Beck' - publisher = u'Premiere Radio Networks' - category = u'News, Opinion' - description = u'The fusion of entertainment and enlightenment' - - oldest_article = 7 - max_articles_per_feed = 100 - - no_stylesheets = True - remove_javascript = True - use_embedded_content = False - - feeds = [(u'Glenn Beck', u'http://feeds.feedburner.com/GlennBeckArticles')] - - def preprocess_html(self, soup): - # Their html is horribly broken; if we search for the div that has the content BeatifulSoup returns the div with only the headline and no content. - # This is due to illegal nesting of tags. So we do it the hard way. - - # We can find this one, and we don't want it. - div = soup.find('div', attrs={'id': 'extraInfo'}) - if div: - div.extract() - - # Don't want these either. - iframes = soup.findAll('iframe') - [iframe.extract() for iframe in iframes] - - # Get empty document. - freshSoup = self.getFreshSoup() - - # This is the broken div; but we can find the headline. - newsDiv = soup.find('div', attrs={'class': 'news-detail'}) - if newsDiv: - if newsDiv.h1: - freshSoup.body.append(newsDiv.h1) - - # The content is wrapped in

tags, most of the time anyway. - counter = 0 - for p in soup.findAll('p'): - if p.get('class') == 'smalltextwhite': - # But we don't want this one. - continue - - freshSoup.body.append(p) - counter += 1 - - # Debugging block - - # In some articles the content is not wrapped in

tags. In that case the counter is low. - # 2 is the magic number that seems to work. - if counter <= 2: - # So they are playing hard-to-get: first throw out all comments. - comments = soup.findAll( - text=lambda text: isinstance(text, Comment)) - [comment.extract() for comment in comments] - - # Find all unwrapped strings. - for txt in soup.findAll(text=True): - raw = txt.strip() - # Debugging line - - if (txt.parent.name == 'body' and len(raw) > 0) and not (len(raw) == 6 and raw == ' '): - # This is our content; ignore the rest. - para = new_tag(freshSoup, 'p') - para.append(raw) - freshSoup.body.append(para) - counter += 1 - - # Now if the counter is still 0 or 1 they did something completely - # different and we still have an empty article. In a last attempt, - # add the whole content div, just in case. - if counter < 2: - freshSoup.body.append(newsDiv) - - # Debugging block - - return freshSoup - - def getFreshSoup(self, title=None): - if title: - return BeautifulSoup('' + str(title) + '') - else: - return BeautifulSoup('') diff --git a/recipes/glos_wielkopolski.recipe b/recipes/glos_wielkopolski.recipe deleted file mode 100644 index 8bd140f1b2..0000000000 --- a/recipes/glos_wielkopolski.recipe +++ /dev/null @@ -1,45 +0,0 @@ -from calibre.web.feeds.news import BasicNewsRecipe - - -class GlosWielkopolski(BasicNewsRecipe): - title = u'G\u0142os Wielkopolski' - __author__ = 'fenuks' - description = u'Gazeta Regionalna Głos Wielkopolski. Najnowsze Wiadomości Poznań. Czytaj Informacje Poznań!' - category = 'newspaper' - language = 'pl' - encoding = 'iso-8859-2' - masthead_url = 'http://s.polskatimes.pl/g/logo_naglowek/gloswielkopolski.png?24' - oldest_article = 7 - max_articles_per_feed = 100 - remove_empty_feeds = True - no_stylesheets = True - use_embedded_content = False - ignore_duplicate_articles = {'title', 'url'} - remove_tags_after = dict(attrs={'src': 'http://nm.dz.com.pl/dz.png'}) - remove_tags = [dict(id='mat-podobne'), dict(name='a', attrs={'class': 'czytajDalej'}), dict(attrs={ - 'src': 'http://nm.dz.com.pl/dz.png'}), dict(name='a', attrs={'href', 'http://www.gloswielkopolski.pl/newsletter/'})] - - feeds = [ - (u'Wszystkie', u'http://gloswielkopolski.feedsportal.com/c/32980/f/533779/index.rss?201302'), - (u'Wiadomo\u015bci', u'http://gloswielkopolski.feedsportal.com/c/32980/f/533780/index.rss?201302'), - (u'Sport', u'http://gloswielkopolski.feedsportal.com/c/32980/f/533781/index.rss?201302'), - (u'Kultura', u'http://gloswielkopolski.feedsportal.com/c/32980/f/533782/index.rss?201302'), - (u'Porady', u'http://www.gloswielkopolski.pl/rss/gloswielkopolski_porady.xml?201302'), - (u'Blogi', u'http://www.gloswielkopolski.pl/rss/gloswielkopolski_blogi.xml?201302'), - (u'Nasze akcje', u'http://www.gloswielkopolski.pl/rss/gloswielkopolski_naszeakcje.xml?201302'), - (u'Opinie', u'http://www.gloswielkopolski.pl/rss/gloswielkopolski_opinie.xml?201302'), - (u'Magazyn', u'http://www.gloswielkopolski.pl/rss/gloswielkopolski_magazyn.xml?201302')] - - def print_version(self, url): - return url.replace('artykul', 'drukuj') - - def skip_ad_pages(self, soup): - if 'Advertisement' in soup.title: - nexturl = soup.find('a')['href'] - return self.index_to_soup(nexturl, raw=True) - - def get_cover_url(self): - soup = self.index_to_soup( - 'http://www.prasa24.pl/gazeta/glos-wielkopolski/') - self.cover_url = soup.find(id='pojemnik').img['src'] - return getattr(self, 'cover_url', self.cover_url) diff --git a/recipes/go4it.recipe b/recipes/go4it.recipe deleted file mode 100644 index 38bc9619c5..0000000000 --- a/recipes/go4it.recipe +++ /dev/null @@ -1,46 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- - -__license__ = 'GPL v3' -__copyright__ = u'2011, Silviu Cotoar\u0103' -''' -go4it.ro -''' - -from calibre.web.feeds.news import BasicNewsRecipe - - -class Go4ITro(BasicNewsRecipe): - title = u'go4it' - __author__ = u'Silviu Cotoar\u0103' - description = 'Gadgeturi, Lifestyle, Tehnologie' - publisher = 'go4it' - oldest_article = 5 - language = 'ro' - max_articles_per_feed = 100 - no_stylesheets = True - use_embedded_content = False - category = 'Reviste,Ziare,IT' - encoding = 'utf-8' - cover_url = 'http://www.go4it.ro/images/logo.png' - - conversion_options = { - 'comments': description, 'tags': category, 'language': language, 'publisher': publisher - } - - keep_only_tags = [ - dict(name='div', attrs={'class': 'subTitle clearfix'}), dict( - name='div', attrs={'class': 'story'}) - ] - - remove_tags = [ - dict(name='span', attrs={'class': ['data']}), dict( - name='a', attrs={'class': ['comments']}) - ] - - feeds = [ - (u'Feeds', u'http://feeds2.feedburner.com/Go4itro-Stiri') - ] - - def preprocess_html(self, soup): - return self.adeify_images(soup) diff --git a/recipes/goal.recipe b/recipes/goal.recipe deleted file mode 100644 index f8300ea508..0000000000 --- a/recipes/goal.recipe +++ /dev/null @@ -1,13 +0,0 @@ -from calibre.web.feeds.news import BasicNewsRecipe - - -class AdvancedUserRecipe1325677767(BasicNewsRecipe): - title = u'Goal' - oldest_article = 1 - language = 'it' - max_articles_per_feed = 100 - auto_cleanup = True - remove_tags_after = [dict(id='article_content')] - feeds = [(u'Goal', u'http://www.goal.com/it/feeds/news?fmt=rss')] - __author__ = 'faber1971' - description = 'Sports news from Italy' diff --git a/recipes/gofin_pl.recipe b/recipes/gofin_pl.recipe deleted file mode 100644 index a4361245c3..0000000000 --- a/recipes/gofin_pl.recipe +++ /dev/null @@ -1,33 +0,0 @@ -#!/usr/bin/env python - -__license__ = 'GPL v3' -__author__ = 'teepel ' - -''' -gofin.pl -''' - -from calibre.web.feeds.news import BasicNewsRecipe - - -class gofin(BasicNewsRecipe): - title = u'Gofin' - __author__ = 'teepel ' - language = 'pl' - description = u'Portal Podatkowo-Księgowy' - INDEX = 'http://gofin.pl' - oldest_article = 7 - max_articles_per_feed = 100 - remove_empty_feeds = True - simultaneous_downloads = 5 - remove_javascript = True - no_stylesheets = True - auto_cleanup = True - - feeds = [ - (u'Podatki', u'http://www.rss.gofin.pl/podatki.xml'), - (u'Prawo Pracy', u'http://www.rss.gofin.pl/prawopracy.xml'), - (u'Rachunkowo\u015b\u0107', u'http://www.rss.gofin.pl/rachunkowosc.xml'), - (u'Sk\u0142adki, zasi\u0142ki, emerytury', u'http://www.rss.gofin.pl/zasilki.xml'), - (u'Firma', u'http://www.rss.gofin.pl/firma.xml'), - (u'Prawnik radzi', u'http://www.rss.gofin.pl/prawnikradzi.xml')] diff --git a/recipes/good_to_know.recipe b/recipes/good_to_know.recipe deleted file mode 100644 index cd35d03db3..0000000000 --- a/recipes/good_to_know.recipe +++ /dev/null @@ -1,34 +0,0 @@ -from calibre.web.feeds.news import BasicNewsRecipe - - -class AdvancedUserRecipe1305547242(BasicNewsRecipe): - title = u'Good to Know (uk)' - oldest_article = 14 - max_articles_per_feed = 100 - no_stylesheets = True - use_embedded_content = False - remove_javascript = True - __author__ = 'Anonymous' - language = 'en_GB' - remove_tags = [ - dict(name='div', attrs={'class': ['articles_footer', 'printoptions']})] - - def print_version(self, url): - return url + '/print/1' - - def preprocess_html(self, soup): - for alink in soup.findAll('a'): - if alink.string is not None: - tstr = alink.string - alink.replaceWith(tstr) - return soup - - feeds = [ (u'Family Conception Advice', u'http://www.goodtoknow.co.uk/feeds/family.rss'), - (u'Family Health Advice', u'http://www.goodtoknow.co.uk/feeds/health.rss'), - (u'Diet Advice', u'http://www.goodtoknow.co.uk/feeds/diet.rss'), - (u'Food Advice', u'http://www.goodtoknow.co.uk/feeds/food.rss'), - (u'Sex Advice', u'http://www.goodtoknow.co.uk/feeds/sex.rss'), - (u'Easy Exercise', u'http://www.goodtoknow.co.uk/feeds/easyexercise.rss'), - (u'Recipes', u'http://www.goodtoknow.co.uk/feeds/recipes.rss'), - (u'Food Quick-tips', u'http://www.goodtoknow.co.uk/feeds/foodquicktips.rss'), - ] diff --git a/recipes/googlemobileblog.recipe b/recipes/googlemobileblog.recipe deleted file mode 100644 index abd8705866..0000000000 --- a/recipes/googlemobileblog.recipe +++ /dev/null @@ -1,13 +0,0 @@ -# -*- coding: utf-8 -*- - -from calibre.web.feeds.news import BasicNewsRecipe - - -class BasicUserRecipe1318572445(BasicNewsRecipe): - title = u'Google Mobile Blog' - language = 'en' - oldest_article = 7 - max_articles_per_feed = 100 - auto_cleanup = True - - feeds = [(u'Google Mobile Blog', u'http://googlemobile.blogspot.com/atom.xml')] diff --git a/recipes/grenaalokalavisen_dk.recipe b/recipes/grenaalokalavisen_dk.recipe deleted file mode 100644 index 7ba89f98c7..0000000000 --- a/recipes/grenaalokalavisen_dk.recipe +++ /dev/null @@ -1,32 +0,0 @@ -#!/usr/bin/env python -# vim:fileencoding=utf-8 -# https://manual.calibre-ebook.com/news_recipe.html -from __future__ import absolute_import, division, print_function, unicode_literals - -from calibre.web.feeds.news import BasicNewsRecipe - -''' -Lokalavisen Grenaa -''' - - -class GrenaaLokalavisen_dk(BasicNewsRecipe): - __author__ = 'CoderAllan.github.com' - title = 'Lokalavisen Grenaa' - description = 'Lokale og regionale nyheder, sport, kultur fra Grenå og omegn på grenaa.lokalavisen.dk' - category = 'newspaper, news, localnews, sport, culture, Denmark' - oldest_article = 7 - max_articles_per_feed = 50 - auto_cleanup = True - language = 'da' - - feeds = [ - ('Seneste nyt fra Lokalavisen Grenaa', 'http://grenaa.lokalavisen.dk/section/senestenytrss'), - ('Seneste lokale nyheder fra Lokalavisen Grenaa', 'http://grenaa.lokalavisen.dk/section/senestelokalenyhederrss'), - ('Seneste sport fra Lokalavisen Grenaa', 'http://grenaa.lokalavisen.dk/section/senestesportrss'), - ('Seneste 112 nyheder fra Lokalavisen Grenaa', 'http://grenaa.lokalavisen.dk/section/seneste112rss'), - ('Seneste kultur nyheder fra Lokalavisen Grenaa', 'http://grenaa.lokalavisen.dk/section/senestekulturrss'), - ('Seneste læserbreve fra Lokalavisen Grenaa', 'http://grenaa.lokalavisen.dk/section/senestelaeserbreverss'), - - ] - diff --git a/recipes/gribskovlokalavisen_dk.recipe b/recipes/gribskovlokalavisen_dk.recipe deleted file mode 100644 index d800f7aba5..0000000000 --- a/recipes/gribskovlokalavisen_dk.recipe +++ /dev/null @@ -1,32 +0,0 @@ -#!/usr/bin/env python -# vim:fileencoding=utf-8 -# https://manual.calibre-ebook.com/news_recipe.html -from __future__ import absolute_import, division, print_function, unicode_literals - -from calibre.web.feeds.news import BasicNewsRecipe - -''' -Ugeposten Gribskov -''' - - -class GribskovLokalavisen_dk(BasicNewsRecipe): - __author__ = 'CoderAllan.github.com' - title = 'Ugeposten Gribskov' - description = 'Lokale og regionale nyheder, sport og kultur fra Gribskov og omegn på gribskov.lokalavisen.dk' - category = 'newspaper, news, localnews, sport, culture, Denmark' - oldest_article = 7 - max_articles_per_feed = 50 - auto_cleanup = True - language = 'da' - - feeds = [ - ('Seneste nyt fra Ugeposten Gribskov', 'http://gribskov.lokalavisen.dk/section/senestenytrss'), - ('Seneste lokale nyheder fra Ugeposten Gribskov', 'http://gribskov.lokalavisen.dk/section/senestelokalenyhederrss'), - ('Seneste sport fra Ugeposten Gribskov', 'http://gribskov.lokalavisen.dk/section/senestesportrss'), - ('Seneste 112 nyheder fra Ugeposten Gribskov', 'http://gribskov.lokalavisen.dk/section/seneste112rss'), - ('Seneste kultur nyheder fra Ugeposten Gribskov', 'http://gribskov.lokalavisen.dk/section/senestekulturrss'), - ('Seneste læserbreve fra Ugeposten Gribskov', 'http://gribskov.lokalavisen.dk/section/senestelaeserbreverss'), - - ] - diff --git a/recipes/grid_to.recipe b/recipes/grid_to.recipe deleted file mode 100644 index 6645225047..0000000000 --- a/recipes/grid_to.recipe +++ /dev/null @@ -1,81 +0,0 @@ -from calibre.web.feeds.news import BasicNewsRecipe - - -class TheGrid(BasicNewsRecipe): - #: The title to use for the ebook - title = u'The Grid' - - #: A couple of lines that describe the content this recipe downloads. - #: This will be used primarily in a GUI that presents a list of recipes. - description = (u'The Grid is a weekly city magazine and daily website providing a fresh, ' - 'accessible voice for Toronto.') - - #: The author of this recipe - __author__ = u'Yusuf W' - - #: The language that the news is in. Must be an ISO-639 code either - #: two or three characters long - language = 'en_CA' - - #: Publication type - #: Set to newspaper, magazine or blog - publication_type = 'newspaper' - - #: Convenient flag to disable loading of stylesheets for websites - #: that have overly complex stylesheets unsuitable for conversion - #: to ebooks formats - #: If True stylesheets are not downloaded and processed - no_stylesheets = True - - #: List of tags to be removed. Specified tags are removed from downloaded HTML. - remove_tags_before = dict(name='div', id='content') - remove_tags_after = dict(name='div', id='content') - remove_tags = [ - dict(name='div', attrs={'class': 'right-content pull-right'}), - dict(name='div', attrs={'class': 'right-content'}), - dict(name='div', attrs={'class': 'ftr-line'}), - dict(name='div', attrs={'class': 'pull-right'}), - dict(name='div', id='comments'), - dict(name='div', id='tags') - ] - - #: Keep only the specified tags and their children. - # keep_only_tags = [dict(name='div', id='content')] - - cover_margins = (0, 0, '#ffffff') - - INDEX = 'http://www.thegridto.com' - - def get_cover_url(self): - soup = self.index_to_soup(self.INDEX) - cover_url = soup.find( - attrs={'class': 'article-block latest-issue'}).find('img')['src'] - - return cover_url - - def parse_index(self): - - # Get the latest issue - soup = self.index_to_soup(self.INDEX) - a = soup.find( - 'div', attrs={'class': 'full-content stuff-ftr'}).findAll('a')[2] - - # Parse the index of the latest issue - self.INDEX = self.INDEX + a['href'] - soup = self.index_to_soup(self.INDEX) - - feeds = [] - for section in ['city', 'life', 'culture']: - section_class = 'left-content article-listing ' + section + ' pull-left' - div = soup.find(attrs={'class': section_class}) - - articles = [] - for a in div.findAll(attrs={'class': 'post-title'}): - title = self.tag_to_string(a) - url = a['href'] - - articles.append({'title': title, 'url': url, - 'description': '', 'date': ''}) - - feeds.append((section, articles)) - return feeds diff --git a/recipes/grrm.recipe b/recipes/grrm.recipe deleted file mode 100644 index 5acb66a046..0000000000 --- a/recipes/grrm.recipe +++ /dev/null @@ -1,32 +0,0 @@ - -__license__ = 'GPL v3' -__copyright__ = '2011, Darko Miletic ' -''' -grrm.livejournal.com -''' - -from calibre.web.feeds.news import BasicNewsRecipe - - -class NotABlog(BasicNewsRecipe): - title = 'Not A Blog - George R.R. Martin' - __author__ = 'Darko Miletic' - description = 'George R.R. Martin' - oldest_article = 15 - max_articles_per_feed = 100 - language = 'en' - encoding = 'utf-8' - no_stylesheets = True - use_embedded_content = True - publication_type = 'blog' - - conversion_options = { - 'comment': description, 'tags': 'sf, fantasy, game of thrones', 'publisher': 'George R.R. Martin', 'language': language - } - - feeds = [(u'Posts', u'http://grrm.livejournal.com/data/rss')] - - def preprocess_html(self, soup): - for item in soup.findAll(style=True): - del item['style'] - return self.adeify_images(soup) diff --git a/recipes/gs24_pl.recipe b/recipes/gs24_pl.recipe deleted file mode 100644 index 9f24b1e66f..0000000000 --- a/recipes/gs24_pl.recipe +++ /dev/null @@ -1,46 +0,0 @@ -# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai - -import re -import string - -from calibre.web.feeds.news import BasicNewsRecipe - - -class AdvancedUserRecipe1322322819(BasicNewsRecipe): - title = u'GS24.pl (Głos Szczeciński)' - description = u'Internetowy serwis Głosu Szczecińskiego' - __author__ = u'Michał Szkutnik' - __license__ = u'GPL v3' - language = 'pl' - publisher = 'Media Regionalne sp. z o.o.' - category = 'news, szczecin' - oldest_article = 2 - max_articles_per_feed = 100 - auto_cleanup = True - cover_url = "http://www.gs24.pl/images/top_logo.png" - - feeds = [ - # (u'Wszystko', u'http://www.gs24.pl/rss.xml'), - (u'Szczecin', u'http://www.gs24.pl/szczecin.xml'), - (u'Stargard', u'http://www.gs24.pl/stargard.xml'), - (u'Świnoujście', u'http://www.gs24.pl/swinoujscie.xml'), - (u'Goleniów', u'http://www.gs24.pl/goleniow.xml'), - (u'Gryfice', u'http://www.gs24.pl/gryfice.xml'), - (u'Kamień Pomorski', u'http://www.gs24.pl/kamienpomorski.xml'), - (u'Police', u'http://www.gs24.pl/police.xml'), - (u'Region', u'http://www.gs24.pl/region.xml'), - (u'Sport', u'http://www.gs24.pl/sport.xml'), - ] - - def get_article_url(self, article): - s = re.search("""/0L0S(gs24.*)/story01.htm""", article.link) - s = s.group(1) - replacements = {"0B": ".", "0C": "/", - "0H": ",", "0I": "_", "0D": "?", "0F": "="} - for (a, b) in replacements.items(): - s = string.replace(s, a, b) - s = string.replace(s, "0A", "0") - return "http://" + s - - def print_version(self, url): - return url + "&Template=printpicart" diff --git a/recipes/gulfnews.recipe b/recipes/gulfnews.recipe deleted file mode 100644 index 7adbff5182..0000000000 --- a/recipes/gulfnews.recipe +++ /dev/null @@ -1,61 +0,0 @@ -__license__ = 'GPL v3' -__copyright__ = '2011, Darko Miletic ' -''' -gulfnews.com -''' - -from calibre.web.feeds.news import BasicNewsRecipe - - -class GulfNews(BasicNewsRecipe): - title = 'Gulf News' - __author__ = 'Darko Miletic' - description = 'News from United Arab Emirrates, persian gulf and rest of the world' - publisher = 'Al Nisr Publishing LLC' - category = 'news, politics, UAE, world' - oldest_article = 2 - max_articles_per_feed = 200 - no_stylesheets = True - encoding = 'utf8' - use_embedded_content = False - language = 'en' - remove_empty_feeds = True - publication_type = 'newsportal' - masthead_url = 'http://gulfnews.com/media/img/gulf_news_logo.jpg' - extra_css = """ - body{font-family: Arial,Helvetica,sans-serif } - img{margin-bottom: 0.4em; display:block} - h1{font-family: Georgia, 'Times New Roman', Times, serif} - ol,ul{list-style: none} - .synopsis{font-size: small} - .details{font-size: x-small} - .image{font-size: xx-small} - """ - - conversion_options = { - 'comment': description, 'tags': category, 'publisher': publisher, 'language': language - } - - remove_tags = [ - dict(name=['meta', 'link', 'object', 'embed']), dict( - attrs={'class': ['quickLinks', 'ratings']}), dict(attrs={'id': 'imageSelector'}) - ] - remove_attributes = ['lang'] - keep_only_tags = [ - dict(name='h1'), dict( - attrs={'class': ['synopsis', 'details', 'image', 'article']}) - ] - - feeds = [ - - (u'UAE News', u'http://gulfnews.com/cmlink/1.446094'), - (u'Business', u'http://gulfnews.com/cmlink/1.446098'), - (u'Entertainment', u'http://gulfnews.com/cmlink/1.446095'), - (u'Sport', u'http://gulfnews.com/cmlink/1.446096'), - (u'Life', u'http://gulfnews.com/cmlink/1.446097') - ] - - def preprocess_html(self, soup): - for item in soup.findAll(style=True): - del item['style'] - return soup diff --git a/recipes/gulli.recipe b/recipes/gulli.recipe deleted file mode 100644 index 6ce1924ad3..0000000000 --- a/recipes/gulli.recipe +++ /dev/null @@ -1,26 +0,0 @@ -from calibre.web.feeds.news import BasicNewsRecipe - - -class AdvancedUserRecipe1259599587(BasicNewsRecipe): - title = u'Gulli' - description = 'News from Germany' - language = 'de' - __author__ = 'posativ' - oldest_article = 7 - max_articles_per_feed = 100 - no_stylesheets = True - - feeds = [(u'gulli:news', u'http://ticker.gulli.com/rss/')] - - remove_tags = [dict(name='div', attrs={'class': ['FloatL', '_forumBox']})] - - keep_only_tags = [dict(name='div', attrs={'id': ['_contentLeft']})] - - remove_tags_after = [dict(name='div', attrs={'class': ['_bookmark']})] - - extra_css = ''' - .byline {color:#666;margin-bottom:0;font-size:12px} - .blockquote {color:#030303;font-style:italic;padding-left:15px;} - img {align:center;} - .li {list-style-type: none} - ''' diff --git a/recipes/h3.recipe b/recipes/h3.recipe deleted file mode 100644 index 022dc7d12a..0000000000 --- a/recipes/h3.recipe +++ /dev/null @@ -1,38 +0,0 @@ -#!/usr/bin/env python -# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai -from __future__ import with_statement - -__license__ = 'GPL v3' -__copyright__ = '2009, Kovid Goyal ' -__docformat__ = 'restructuredtext en' - -from calibre.web.feeds.news import BasicNewsRecipe - - -class H3(BasicNewsRecipe): - title = u'H\xedrszerz\u0151' - oldest_article = 5 - max_articles_per_feed = 50 - language = 'hu' - - __author__ = 'Ezmegaz' - - feeds = [ - (u'Belf\xf6ld', - u'http://www.hirszerzo.hu/rss.belfold.xml'), - (u'K\xfclf\xf6ld', - u'http://www.hirszerzo.hu/rss.kulfold.xml'), - (u'Profit', - u'http://www.hirszerzo.hu/rss.profit.xml'), - (u'Shake', - u'http://www.hirszerzo.hu/rss.shake.xml'), - (u'Publicisztika', - u'http://www.hirszerzo.hu/rss.publicisztika.xml'), - (u'Elemz\xe9s', - u'http://www.hirszerzo.hu/rss.elemzes.xml'), - (u'Sorok k\xf6z\xf6tt', - u'http://www.hirszerzo.hu/rss.sorok_kozott.xml'), - (u'Gal\xe9ria', - u'http://www.hirszerzo.hu/rss.galeria.xml'), - (u'Patro', - u'http://www.hirszerzo.hu/rss.patro.xml')] diff --git a/recipes/haderslevlokalavisen_dk.recipe b/recipes/haderslevlokalavisen_dk.recipe deleted file mode 100644 index acf69c606b..0000000000 --- a/recipes/haderslevlokalavisen_dk.recipe +++ /dev/null @@ -1,32 +0,0 @@ -#!/usr/bin/env python -# vim:fileencoding=utf-8 -# https://manual.calibre-ebook.com/news_recipe.html -from __future__ import absolute_import, division, print_function, unicode_literals - -from calibre.web.feeds.news import BasicNewsRecipe - -''' -Lokalavisen Haderslev -''' - - -class HaderslevLokalavisen_dk(BasicNewsRecipe): - __author__ = 'CoderAllan.github.com' - title = 'Lokalavisen Haderslev' - description = 'Lokale og regionale nyheder, sport, kultur fra Haderslev og omegn på haderslev.lokalavisen.dk' - category = 'newspaper, news, localnews, sport, culture, Denmark' - oldest_article = 7 - max_articles_per_feed = 50 - auto_cleanup = True - language = 'da' - - feeds = [ - ('Seneste nyt fra Lokalavisen Haderslev', 'http://haderslev.lokalavisen.dk/section/senestenytrss'), - ('Seneste lokale nyheder fra Lokalavisen Haderslev', 'http://haderslev.lokalavisen.dk/section/senestelokalenyhederrss'), - ('Seneste sport fra Lokalavisen Haderslev', 'http://haderslev.lokalavisen.dk/section/senestesportrss'), - ('Seneste 112 nyheder fra Lokalavisen Haderslev', 'http://haderslev.lokalavisen.dk/section/seneste112rss'), - ('Seneste kultur nyheder fra Lokalavisen Haderslev', 'http://haderslev.lokalavisen.dk/section/senestekulturrss'), - ('Seneste læserbreve fra Lokalavisen Haderslev', 'http://haderslev.lokalavisen.dk/section/senestelaeserbreverss'), - - ] - diff --git a/recipes/hankyoreh.recipe b/recipes/hankyoreh.recipe deleted file mode 100644 index e929f87d5c..0000000000 --- a/recipes/hankyoreh.recipe +++ /dev/null @@ -1,36 +0,0 @@ -# -*- coding: utf-8 -*- -__license__ = 'GPL v3' -__copyright__ = '2011, Seongkyoun Yoo ' -''' -Profile to download The Hankyoreh -''' -from calibre.web.feeds.news import BasicNewsRecipe - - -class Hankyoreh(BasicNewsRecipe): - language = 'ko' - title = u'한겨례' - description = u'The Hankyoreh News articles' - __author__ = 'Seongkyoun Yoo' - oldest_article = 7 - max_articles_per_feed = 10 - no_stylesheets = True - remove_javascript = True - - keep_only_tags = [ - dict(name='div', attrs={'class': ['article-head']}), - dict(name='div', attrs={'class': ['article-text']}), - ] - remove_tags = [ - dict(name='p', attrs={'class': ['category']}), - ] - remove_tags_after = dict(id={'ad_box01'}) - - feeds = [ - (u'정치', 'http://www.hani.co.kr/ilram/rss/hkr_news_list_politics.xml'), - (u'사회', 'http://www.hani.co.kr/ilram/rss/hkr_news_list_society.xml'), - (u'문화', 'http://www.hani.co.kr/ilram/rss/hkr_news_list_culture.xml'), - (u'스포츠', 'http://www.hani.co.kr/ilram/rss/hkr_news_list_sports.xml'), - (u'사설·칼럼', 'http://www.hani.co.kr/ilram/rss/hkr_news_list_opinion.xml'), - (u'만화만평', 'http://www.hani.co.kr/ilram/rss/hkr_news_list_cartoon.xml'), - ] diff --git a/recipes/heavy_metal_it.recipe b/recipes/heavy_metal_it.recipe deleted file mode 100644 index bd23923966..0000000000 --- a/recipes/heavy_metal_it.recipe +++ /dev/null @@ -1,23 +0,0 @@ -from calibre.web.feeds.news import BasicNewsRecipe - - -class AdvancedUserRecipe1336289226(BasicNewsRecipe): - title = u'Heavy Metal' - oldest_article = 15 - max_articles_per_feed = 100 - auto_cleanup = False - masthead_url = 'http://net-static2.tccstatic.com/template/tmw/img/tj.gif' - feeds = [(u'Heavy Metal', u'http://www.heavy-metal.it/feed/')] - keep_only_tags = [ - dict(name='div', attrs={'class': 'entry'}) - ] - remove_tags_after = [ - dict(name='div', attrs={'class': 'sociable'}) - ] - description = 'An Heavy metal Italian magazine' - __author__ = 'faber1971' - language = 'it' - - -__version__ = 'v1.0' -__date__ = '6, May 2012' diff --git a/recipes/heise_open.recipe b/recipes/heise_open.recipe deleted file mode 100644 index 2b8c5a7d1f..0000000000 --- a/recipes/heise_open.recipe +++ /dev/null @@ -1,37 +0,0 @@ -__license__ = 'GPL v3' -__copyright__ = '2010, Anton Gillert ' - -''' -Fetch Heise Open. -''' -from calibre.web.feeds.news import BasicNewsRecipe - - -class HeiseOpenDe(BasicNewsRecipe): - - title = 'Heise Open' - description = 'Opensource news from Germany' - __author__ = 'Anton Gillert' - use_embedded_content = False - language = 'de' - timefmt = ' [%d %b %Y]' - max_articles_per_feed = 40 - no_stylesheets = True - - feeds = [('Heise Open', 'http://www.heise.de/open/news/news-atom.xml')] - - def print_version(self, url): - return url + '?view=print' - - remove_tags = [dict(id='navi_top'), - dict(id='navi_bottom'), - dict(name='div', attrs={'class': 'navi_top_logo'}), - dict(name='img', attrs={ - 'src': '/open/icons/open_logo_2009_weiss.gif'}), - dict(name='h5', attrs={'style': 'margin: 0.5em 0;'}), - dict(name='p', attrs={'class': 'news_datum'}), - dict(name='p', attrs={'class': 'size80'})] - remove_tags_after = [dict(name='p', attrs={'class': 'size80'})] - - def get_cover_url(self): - return 'http://www.heise.de/open/icons/open_logo_2009_weiss.gif' diff --git a/recipes/helsingin_sanomat.recipe b/recipes/helsingin_sanomat.recipe deleted file mode 100644 index cf50e6587e..0000000000 --- a/recipes/helsingin_sanomat.recipe +++ /dev/null @@ -1,34 +0,0 @@ -from calibre.web.feeds.news import BasicNewsRecipe - - -def classes(classes): - q = frozenset(classes.split(' ')) - return dict(attrs={'class': lambda x: x and frozenset(x.split()).intersection(q)}) - - -class AdvancedUserRecipe1298137661(BasicNewsRecipe): - title = u'Helsingin Sanomat' - __author__ = 'oneillpt' - language = 'fi' - oldest_article = 7 - max_articles_per_feed = 100 - no_stylesheets = True - remove_javascript = True - keep_only_tags = [ - classes('article-title single-article'), - ] - remove_tags = [ - dict(attrs={'class':['hidden print-url', 'article-paywall']}), - dict(style=lambda x: x and 'height: 0' in x), - ] - - feeds = [ - (u'Uutiset - HS.fi', u'https://www.hs.fi/uutiset/rss/'), - ] - - def preprocess_html(self, soup): - for tag in soup.findAll(attrs={'data-mfp-src':True}): - tag.name = 'img' - tag['src'] = tag['data-mfp-src'] - tag['style'] = 'display:block' - return soup diff --git a/recipes/hindu_human_rights.recipe b/recipes/hindu_human_rights.recipe deleted file mode 100644 index ce967842fe..0000000000 --- a/recipes/hindu_human_rights.recipe +++ /dev/null @@ -1,22 +0,0 @@ -#!/usr/bin/env python -# vim:fileencoding=utf-8 -from calibre.web.feeds.news import BasicNewsRecipe - - -def classes(classes): - q = frozenset(classes.split(' ')) - return dict( - attrs={'class': lambda x: x and frozenset(x.split()).intersection(q)}) - - -class HinduHumanRights(BasicNewsRecipe): - title = 'Hindu Human Rights' - __author__ = 'Vishvas Vasuki' - language = 'en_IN' - oldest_article = 30 - max_articles_per_feed = 100 - auto_cleanup = True - - feeds = [ - ('HHR main', 'https://www.hindusforhumanrights.org/en/home?format=rss'), - ] diff --git a/recipes/hnonline.recipe b/recipes/hnonline.recipe deleted file mode 100644 index 5a5f6ee5a9..0000000000 --- a/recipes/hnonline.recipe +++ /dev/null @@ -1,79 +0,0 @@ -from calibre.web.feeds.news import BasicNewsRecipe - - -class HNonlineRecipe(BasicNewsRecipe): - __license__ = 'GPL v3' - __author__ = 'lacike' - language = 'sk' - version = 1 - - title = u'HNonline' - publisher = u'HNonline' - category = u'News, Newspaper' - description = u'News from Slovakia' - cover_url = u'http://hnonline.sk/img/sk/_relaunch/logo2.png' - - oldest_article = 1 - max_articles_per_feed = 100 - use_embedded_content = False - remove_empty_feeds = True - - no_stylesheets = True - remove_javascript = True - - # Feeds from: http://rss.hnonline.sk, for listing see - # http://rss.hnonline.sk/prehlad - feeds = [] - feeds.append((u'HNonline|Ekonomika a firmy', - u'http://rss.hnonline.sk/?p=kC1000')) - feeds.append((u'HNonline|Slovensko', u'http://rss.hnonline.sk/?p=kC2000')) - feeds.append((u'HNonline|Svet', u'http://rss.hnonline.sk/?p=kC3000')) - feeds.append((u'HNonline|\u0160port', u'http://rss.hnonline.sk/?p=kC4000')) - feeds.append((u'HNonline|Online rozhovor', - u'http://rss.hnonline.sk/?p=kCR000')) - - feeds.append((u'FinWeb|Spr\u00E1vy zo sveta financi\u00ED', - u'http://rss.finweb.hnonline.sk/spravodajstvo')) - feeds.append((u'FinWeb|Koment\u00E1re a anal\u00FDzy', - u'http://rss.finweb.hnonline.sk/?p=kPC200')) - feeds.append((u'FinWeb|Invest\u00EDcie', - u'http://rss.finweb.hnonline.sk/?p=kPC300')) - feeds.append((u'FinWeb|Svet akci\u00ED', - u'http://rss.finweb.hnonline.sk/?p=kPC400')) - feeds.append( - (u'FinWeb|Rozhovory', u'http://rss.finweb.hnonline.sk/?p=kPC500')) - feeds.append((u'FinWeb|T\u00E9ma t\u00FD\u017Ed\u0148a', - u'http://rss.finweb.hnonline.sk/?p=kPC600')) - feeds.append((u'FinWeb|Rebr\u00ED\u010Dky', - u'http://rss.finweb.hnonline.sk/?p=kPC700')) - - feeds.append((u'HNstyle|Kult\u00FAra', - u'http://style.hnonline.sk/?p=kTC100')) - feeds.append((u'HNstyle|Auto-moto', u'http://style.hnonline.sk/?p=kTC200')) - feeds.append((u'HNstyle|Digit\u00E1l', - u'http://style.hnonline.sk/?p=kTC300')) - feeds.append((u'HNstyle|Veda', u'http://style.hnonline.sk/?p=kTCV00')) - feeds.append((u'HNstyle|Dizajn', u'http://style.hnonline.sk/?p=kTC400')) - feeds.append( - (u'HNstyle|Cestovanie', u'http://style.hnonline.sk/?p=kTCc00')) - feeds.append( - (u'HNstyle|V\u00EDkend', u'http://style.hnonline.sk/?p=kTC800')) - feeds.append((u'HNstyle|Gastro', u'http://style.hnonline.sk/?p=kTC600')) - feeds.append((u'HNstyle|M\u00F3da', u'http://style.hnonline.sk/?p=kTC700')) - feeds.append((u'HNstyle|Modern\u00E1 \u017Eena', - u'http://style.hnonline.sk/?p=kTCA00')) - feeds.append((u'HNstyle|Pre\u010Do nie?!', - u'http://style.hnonline.sk/?p=k7C000')) - - keep_only_tags = [] - keep_only_tags.append(dict(name='h1', attrs={'class': 'detail-titulek'})) - keep_only_tags.append( - dict(name='div', attrs={'class': 'detail-podtitulek'})) - keep_only_tags.append(dict(name='div', attrs={'class': 'detail-perex'})) - keep_only_tags.append(dict(name='div', attrs={'class': 'detail-text'})) - - extra_css = ''' - @font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} - @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/LiberationSans.ttf)} - body {font-family: sans1, serif1;} - ''' diff --git a/recipes/hoersholmlokalavisen_dk.recipe b/recipes/hoersholmlokalavisen_dk.recipe deleted file mode 100644 index 476d2f0814..0000000000 --- a/recipes/hoersholmlokalavisen_dk.recipe +++ /dev/null @@ -1,32 +0,0 @@ -#!/usr/bin/env python -# vim:fileencoding=utf-8 -# https://manual.calibre-ebook.com/news_recipe.html -from __future__ import absolute_import, division, print_function, unicode_literals - -from calibre.web.feeds.news import BasicNewsRecipe - -''' -Ugebladet -''' - - -class HoersholmLokalavisen_dk(BasicNewsRecipe): - __author__ = 'CoderAllan.github.com' - title = 'Ugebladet' - description = 'Lokale, regionale nyheder, sport og kultur i Hørsholm, Rungsted, Fredensborg og Humlebæk på hoersholm.lokalavisen.dk' - category = 'newspaper, news, localnews, sport, culture, Denmark' - oldest_article = 7 - max_articles_per_feed = 50 - auto_cleanup = True - language = 'da' - - feeds = [ - ('Seneste nyt fra Ugebladet', 'http://hoersholm.lokalavisen.dk/section/senestenytrss'), - ('Seneste lokale nyheder fra Ugebladet', 'http://hoersholm.lokalavisen.dk/section/senestelokalenyhederrss'), - ('Seneste sport fra Ugebladet', 'http://hoersholm.lokalavisen.dk/section/senestesportrss'), - ('Seneste 112 nyheder fra Ugebladet', 'http://hoersholm.lokalavisen.dk/section/seneste112rss'), - ('Seneste kultur nyheder fra Ugebladet', 'http://hoersholm.lokalavisen.dk/section/senestekulturrss'), - ('Seneste læserbreve fra Ugebladet', 'http://hoersholm.lokalavisen.dk/section/senestelaeserbreverss'), - - ] - diff --git a/recipes/hola.recipe b/recipes/hola.recipe deleted file mode 100644 index dae6328cac..0000000000 --- a/recipes/hola.recipe +++ /dev/null @@ -1,93 +0,0 @@ -#!/usr/bin/env python -# vim:fileencoding=UTF-8 -__license__ = 'GPL v3' -__copyright__ = '30 June 2012, desUBIKado' -__author__ = 'desUBIKado' -__description__ = 'Diario de actualidad, moda y belleza' -__version__ = 'v0.03' -__date__ = '28, Jul 2016' -''' -http://www.hola.com/ -''' - -import re - -from calibre.web.feeds.news import BasicNewsRecipe - - -class hola_es(BasicNewsRecipe): - author = 'desUBIKado' - description = 'Diario de actualidad, moda y belleza' - title = u'¡Hola!' - publisher = 'Hola S.L.' - category = 'Spanish celebrities, Entertainment News, Royalty, Daily Variety, Hollywood' - language = 'es' - masthead_url = 'http://imagenes.hola.com/comunes/2008/logo-holacom.gif' - timefmt = '[%a, %d %b, %Y]' - oldest_article = 7 - delay = 1 - encoding = 'utf-8' - max_articles_per_feed = 100 - use_embedded_content = False - remove_empty_feeds = True - remove_javascript = True - no_stylesheets = True - - feeds = [ - - (u'Famosos', u'http://www.hola.com/famosos/rss.xml'), - (u'Realeza', u'http://www.hola.com/realeza/rss.xml'), - (u'Cine', u'http://www.hola.com/cine/rss.xml'), - (u'M\xfasica', u'http://www.hola.com/musica/rss.xml'), - (u'Moda y modelos', u'http://www.hola.com/moda/portada/rss.xml'), - (u'Belleza y salud', u'http://www.hola.com/belleza/portada/rss.xml'), - (u'Ni\xf1os', u'http://www.hola.com/ninos/rss.xml') - ] - - keep_only_tags = [ - dict(name='article', attrs={'class': ['body col-md-8 col-xs-12']})] - - remove_tags = [dict(name='div', attrs={'class': ['comments', 'news-share', 'sponsored-news']}), - dict(name='div', attrs={'itemprop': ['logo']}), - dict(name='span', attrs={'class': ['hidden']}), - dict(name='p', attrs={'class': ['hidden']}), - dict(name='section', attrs={'class': ['news-tags']}) - ] - - remove_tags_after = dict(name='div', attrs={'class': 'comments'}) - - # VER GALERÍA - preprocess_regexps = [ - # Quitar VER GALERÍA - (re.compile(r'VER GALER', re.DOTALL | re.IGNORECASE), lambda m: ''), - # Quitar enlaces varios - (re.compile(r'

', - re.DOTALL | re.IGNORECASE), lambda m: '') - ] - - # Recuperamos la portada de papel (la imagen 520 tiene mayor resolucion) - # http://www.hola.com/imagenes/revista/3727/portada-revista-hola-520.jpg - def get_cover_url(self): - index = 'http://www.hola.com/abono/ediciondigital/' - soup = self.index_to_soup(index) - for image in soup.findAll('img', src=True): - if image['src'].endswith('portada-revista-hola-520.jpg'): - return 'http://www.hola.com' + image['src'] - return None - - def get_article_url(self, article): - url = article.get('guid', None) - return url - - extra_css = ''' - h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:30px;} - h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal; font-style:italic; font-size:18px;} - ''' diff --git a/recipes/hollywood_reporter.recipe b/recipes/hollywood_reporter.recipe deleted file mode 100644 index e7120267a7..0000000000 --- a/recipes/hollywood_reporter.recipe +++ /dev/null @@ -1,55 +0,0 @@ -__license__ = 'GPL v3' -__copyright__ = '2014, Darko Miletic ' -''' -www.hollywoodreporter.com -''' - -from calibre.web.feeds.news import BasicNewsRecipe - - -class THR_En(BasicNewsRecipe): - title = 'The Hollywood Reporter' - __author__ = 'Darko Miletic' - description = 'Read about the latest in Hollywood and entertainment news from The Hollywood Reporter, your source for detailed movie reviews, celebrity styles, and industry blogs.' # noqa - publisher = 'The Hollywood Reporter' - category = 'Entertainment news, Hollywood news, celebrity news, latest Hollywood news' - oldest_article = 2 - max_articles_per_feed = 200 - no_stylesheets = True - encoding = 'utf8' - use_embedded_content = False - language = 'en' - remove_empty_feeds = True - publication_type = 'newsportal' - auto_cleanup = True - auto_cleanup_keep = "//div[contains(concat(' ', normalize-space(@class), ' '), ' image ')]" - - extra_css = """ - body{font-family: Georgia,Times,serif} - h1,h2,h3{font-family: "Vonness-Bold-Compressed",Helvetica,sans-serif} - .credit,.caption{font-family: Arial,sans-serif;} - .credit,.caption,.submitted{font-size: small; color: gray;} - .main_media_credit{clear: left; font-size: x-small; text-align: right; color: gray;} - img{margin-top: 0.5em; margin-bottom: 0.4em; display:block} - """ - - conversion_options = { - 'comment': description, 'tags': category, 'publisher': publisher, 'language': language - } - - feeds = [ - - (u'Movies', u'http://feeds.feedburner.com/thr/film'), - (u'TV', u'http://feeds.feedburner.com/thr/television'), - (u'Style&Culture', u'http://feeds.feedburner.com/thr/style'), - (u'International', u'http://feeds.feedburner.com/thr/international'), - (u'Music', u'http://feeds.feedburner.com/thr/music'), - (u'Tech', u'http://feeds.feedburner.com/TheHollywoodReporter-Technology'), - (u'Awards', u'http://feeds.feedburner.com/thr/awards'), - (u'Business', u'http://feeds.feedburner.com/thr/business'), - (u'Asia', u'http://feeds.feedburner.com/HollywoodReporterAsia'), - (u'Guilds and Labor', u'http://feeds.feedburner.com/thr/labor'), - (u'Box Office', u'http://feeds.feedburner.com/thr/boxoffice'), - (u'Real Estate', u'http://feeds.feedburner.com/thr/RealEstate'), - (u'Politics', u'http://feeds.feedburner.com/thr/politics') - ] diff --git a/recipes/hornsherredavis_dk.recipe b/recipes/hornsherredavis_dk.recipe deleted file mode 100644 index a60e5e6ac6..0000000000 --- a/recipes/hornsherredavis_dk.recipe +++ /dev/null @@ -1,27 +0,0 @@ -#!/usr/bin/env python -# vim:fileencoding=utf-8 -# https://manual.calibre-ebook.com/news_recipe.html -from __future__ import absolute_import, division, print_function, unicode_literals - -from calibre.web.feeds.news import BasicNewsRecipe - -''' -Hornsherred Avis -''' - - -class Hornsherredavis_dk(BasicNewsRecipe): - __author__ = 'CoderAllan.github.com' - title = 'Hornsherred Avis' - description = 'Lokale nyheder fra Jægerspis, Skibby og Bramsnæs' - category = 'newspaper, news, localnews, sport, culture, Denmark' - oldest_article = 30 - max_articles_per_feed = 50 - auto_cleanup = True - language = 'da' - - # Feed are found here: http://hornsherredavis.dk/ - feeds = [ - ('Hornsherred Avis', 'http://hornsherredavis.dk/?feed=rss2'), - ('Kommentarer til Hornsherred Avis', 'http://hornsherredavis.dk/?feed=comments-rss2'), - ] diff --git a/recipes/hornsherredlokalavisen_dk.recipe b/recipes/hornsherredlokalavisen_dk.recipe deleted file mode 100644 index b3d453bed2..0000000000 --- a/recipes/hornsherredlokalavisen_dk.recipe +++ /dev/null @@ -1,32 +0,0 @@ -#!/usr/bin/env python -# vim:fileencoding=utf-8 -# https://manual.calibre-ebook.com/news_recipe.html -from __future__ import absolute_import, division, print_function, unicode_literals - -from calibre.web.feeds.news import BasicNewsRecipe - -''' -Lokalavisen Hornsherred -''' - - -class HornsherredLokalavisen_dk(BasicNewsRecipe): - __author__ = 'CoderAllan.github.com' - title = 'Lokalavisen Hornsherred' - description = 'Lokale og regionale nyheder, sport og kultur fra Hornsherred og omegn på hornsherred.lokalavisen.dk' - category = 'newspaper, news, localnews, sport, culture, Denmark' - oldest_article = 7 - max_articles_per_feed = 50 - auto_cleanup = True - language = 'da' - - feeds = [ - ('Seneste nyt fra Lokalavisen Hornsherred', 'http://hornsherred.lokalavisen.dk/section/senestenytrss'), - ('Seneste lokale nyheder fra Lokalavisen Hornsherred', 'http://hornsherred.lokalavisen.dk/section/senestelokalenyhederrss'), - ('Seneste sport fra Lokalavisen Hornsherred', 'http://hornsherred.lokalavisen.dk/section/senestesportrss'), - ('Seneste 112 nyheder fra Lokalavisen Hornsherred', 'http://hornsherred.lokalavisen.dk/section/seneste112rss'), - ('Seneste kultur nyheder fra Lokalavisen Hornsherred', 'http://hornsherred.lokalavisen.dk/section/senestekulturrss'), - ('Seneste læserbreve fra Lokalavisen Hornsherred', 'http://hornsherred.lokalavisen.dk/section/senestelaeserbreverss'), - - ] - diff --git a/recipes/hotcity.recipe b/recipes/hotcity.recipe deleted file mode 100644 index b0ed801420..0000000000 --- a/recipes/hotcity.recipe +++ /dev/null @@ -1,41 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- - -__license__ = 'GPL v3' -__copyright__ = u'2011, Silviu Cotoar\u0103' -''' -hotcity.ro -''' - -from calibre.web.feeds.news import BasicNewsRecipe - - -class HotcityRo(BasicNewsRecipe): - title = u'Hotcity' - __author__ = u'Silviu Cotoar\u0103' - description = u'Cultura urban\u0103 feminin\u0103' - publisher = 'Hotcity' - oldest_article = 5 - language = 'ro' - max_articles_per_feed = 100 - no_stylesheets = True - use_embedded_content = False - category = 'Ziare,Reviste' - encoding = 'utf-8' - cover_url = 'http://www.hotcity.ro/i/bg_header.gif' - - conversion_options = { - 'comments': description, 'tags': category, 'language': language, 'publisher': publisher - } - - keep_only_tags = [ - dict(name='div', attrs={'class': 'articol_title'}), dict( - name='div', attrs={'class': 'text'}) - ] - - feeds = [ - (u'Feeds', u'http://www.hotcity.ro/rss') - ] - - def preprocess_html(self, soup): - return self.adeify_images(soup) diff --git a/recipes/hotnews.recipe b/recipes/hotnews.recipe deleted file mode 100644 index 0f61ef3032..0000000000 --- a/recipes/hotnews.recipe +++ /dev/null @@ -1,40 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- - -__license__ = 'GPL v3' -__copyright__ = u'2011, Silviu Cotoar\u0103' -''' -hotnews.ro -''' - -from calibre.web.feeds.news import BasicNewsRecipe - - -class Hotnews(BasicNewsRecipe): - title = 'Hotnews' - __author__ = u'Silviu Cotoar\u0103' - description = u'\u0218tiri din Rom\u00e2nia' - publisher = 'Hotnews' - oldest_article = 5 - language = 'ro' - max_articles_per_feed = 100 - no_stylesheets = True - use_embedded_content = False - category = 'Ziare,Stiri,Romania' - encoding = 'utf-8' - cover_url = 'http://www.hotnews.ro/images/new/logo.gif' - - conversion_options = { - 'comments': description, 'tags': category, 'language': language, 'publisher': publisher - } - - keep_only_tags = [ - dict(name='h1', attrs={'class': 'title'}), dict( - name='div', attrs={'id': 'articleContent'}) - ] - - feeds = [(u'\u0218tiri', u'http://www.hotnews.ro/rss/actualitate'), (u'English', u'http://www.hotnews.ro/rss/english') - ] - - def preprocess_html(self, soup): - return self.adeify_images(soup) diff --git a/recipes/house_news.recipe b/recipes/house_news.recipe deleted file mode 100644 index ae1f42b484..0000000000 --- a/recipes/house_news.recipe +++ /dev/null @@ -1,31 +0,0 @@ -__license__ = 'GPL v3' -__copyright__ = '2012-2015, Eddie Lau' -from calibre.web.feeds.news import BasicNewsRecipe - - -class AdvancedUserRecipeHouseNews(BasicNewsRecipe): - title = u'The House News Bloggers 主場博客' - __author__ = 'Eddie Lau' - publisher = 'The House News Bloggers' - oldest_article = 1 - max_articles_per_feed = 100 - auto_cleanup = False - no_stylesheets = True - language = 'zh' - encoding = 'utf-8' - description = 'http://thehousenewsbloggers.net' - category = 'Chinese, Blogs, Opinion, News, Hong Kong' - masthead_url = 'http://thehousenewsbloggers.files.wordpress.com/2014/09/screen-shot-2014-09-11-at-8-55-13.png' - extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px; max-height:90%;} p[class=date] {font-size:50%;} div[class=author] {font-size:75%;} p[class=caption] {font-size:50%;}' # noqa - feeds = [(u'Latest', u'http://thehousenewsbloggers.net/feed/')] - keep_only_tags = [dict(name='h1', attrs={'class': ['title']}), - dict(name='span', attrs={'class': ['author vcard']}), - dict(name='time', attrs={'class': ['entry-date']}), - dict(name='section', attrs={'class': ['entry']})] - remove_tags = [dict(name='div', attrs={'id': ['jp-post-flair']})] - - def populate_article_metadata(self, article, soup, first): - if first and hasattr(self, 'add_toc_thumbnail'): - picdiv = soup.find('img') - if picdiv is not None: - self.add_toc_thumbnail(article, picdiv['src']) diff --git a/recipes/hrt.recipe b/recipes/hrt.recipe deleted file mode 100644 index 3042d074ce..0000000000 --- a/recipes/hrt.recipe +++ /dev/null @@ -1,81 +0,0 @@ -#!/usr/bin/env python - -__license__ = 'GPL v3' -__copyright__ = '2009, Darko Miletic ' - -''' -www.hrt.hr -''' - -import re - -from calibre.ebooks.BeautifulSoup import Tag -from calibre.web.feeds.news import BasicNewsRecipe - - -def new_tag(soup, name, attrs=()): - impl = getattr(soup, 'new_tag', None) - if impl is not None: - return impl(name, attrs=dict(attrs)) - return Tag(soup, name, attrs=attrs or None) - - -class HRT(BasicNewsRecipe): - title = 'HRT: Vesti' - __author__ = 'Darko Miletic' - description = 'News from Croatia' - publisher = 'HRT' - category = 'news, politics, Croatia, HRT' - no_stylesheets = True - encoding = 'utf-8' - use_embedded_content = False - language = 'hr' - - lang = 'hr-HR' - extra_css = '''@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} - body{font-family: serif1, serif} .article_description{font-family: serif1, serif} - .news-single-timedata{color:#20558A; font-size:x-small;} - .nsTitle{color:#20558A; font-size:large; font-weight:bold;} - a{color:#20558A;} - .external-link-new-window{color:#20558A;} - - ''' - - conversion_options = { - 'comment': description, 'tags': category, 'publisher': publisher, 'language': lang, 'pretty_print': True - } - - preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')] - - keep_only_tags = [dict(name='div', attrs={'class': 'bigVijest'})] - - remove_tags = [dict(name=['object', 'link', 'embed'])] - - remove_tags_after = dict(name='div', attrs={'class': 'nsAuthor'}) - - feeds = [ - - (u'Vijesti', u'http://www.hrt.hr/?id=316&type=100&rss=vijesti'), - (u'Sport', u'http://www.hrt.hr/?id=316&type=100&rss=sport'), - (u'Zabava', u'http://www.hrt.hr/?id=316&type=100&rss=zabava'), - (u'Filmovi i serije', u'http://www.hrt.hr/?id=316&type=100&rss=filmovi'), - (u'Dokumentarni program', u'http://www.hrt.hr/?id=316&type=100&rss=dokumentarci'), - (u'Glazba', u'http://www.hrt.hr/?id=316&type=100&rss=glazba'), - (u'Kultura', u'http://www.hrt.hr/?id=316&type=100&rss=kultura'), - (u'Mladi', u'http://www.hrt.hr/?id=316&type=100&rss=mladi'), - (u'Manjine', u'http://www.hrt.hr/?id=316&type=100&rss=manjine'), - (u'Radio', u'http://www.hrt.hr/?id=316&type=100&rss=radio') - ] - - def preprocess_html(self, soup): - soup.html['xml:lang'] = self.lang - soup.html['lang'] = self.lang - mlang = new_tag(soup, 'meta', [ - ("http-equiv", "Content-Language"), ("content", self.lang)]) - mcharset = new_tag(soup, 'meta', [ - ("http-equiv", "Content-Type"), ("content", "text/html; charset=UTF-8")]) - soup.head.insert(0, mlang) - soup.head.insert(1, mcharset) - for item in soup.findAll(style=True): - del item['style'] - return self.adeify_images(soup) diff --git a/recipes/huffingtonpost.recipe b/recipes/huffingtonpost.recipe deleted file mode 100644 index e949dc4016..0000000000 --- a/recipes/huffingtonpost.recipe +++ /dev/null @@ -1,127 +0,0 @@ -#!/usr/bin/env python -# vim:fileencoding=utf-8 -from __future__ import print_function - -from calibre.web.feeds.news import BasicNewsRecipe - - -def classes(classes): - q = frozenset(classes.split(' ')) - return dict(attrs={ - 'class': lambda x: x and frozenset(x.split()).intersection(q)}) - - -class HuffingtonPostRecipe(BasicNewsRecipe): - __license__ = 'GPL v3' - __author__ = 'kwetal and Archana Raman' - language = 'en' - version = 2 - - title = u'The Huffington Post' - publisher = u'huffingtonpost.com' - category = u'News, Politics' - description = u'Political Blog' - - oldest_article = 1.1 - max_articles_per_feed = 100 - - encoding = 'utf-8' - remove_empty_feeds = True - no_stylesheets = True - remove_javascript = True - - recipe_specific_options = { - 'days': { - 'short': 'Oldest article to download from this news source. In days ', - 'long': 'For example, 0.5, gives you articles from the past 12 hours', - 'default': str(oldest_article) - } - } - - def __init__(self, *args, **kwargs): - BasicNewsRecipe.__init__(self, *args, **kwargs) - d = self.recipe_specific_options.get('days') - if d and isinstance(d, str): - self.oldest_article = float(d) - - keep_only_tags = [ - classes('entry__header entry__body') - ] - remove_tags = [ - classes('app-download-interstitial share-bar top-media--video advertisement extra-content' - ' below-entry entry-inline-subscription-module related-articles') - ] - # Feeds from: http://www.huffingtonpost.com/syndication/ - feeds = [] - - feeds.append( - (u'Politics', u'http://www.huffingtonpost.com/feeds/verticals/politics/index.xml')) - - feeds.append( - (u'Media', u'http://www.huffingtonpost.com/feeds/verticals/media/index.xml')) - - feeds.append( - (u'Business', u'http://www.huffingtonpost.com/feeds/verticals/business/index.xml')) - - feeds.append( - (u'Entertainment', u'http://www.huffingtonpost.com/feeds/verticals/entertainment/index.xml')) - - feeds.append( - (u'Living', u'http://www.huffingtonpost.com/feeds/verticals/living/index.xml')) - - feeds.append( - (u'Style', u'http://www.huffingtonpost.com/feeds/verticals/style/index.xml')) - - feeds.append( - (u'Green', u'http://www.huffingtonpost.com/feeds/verticals/green/index.xml')) - - feeds.append( - (u'Technology', u'http://www.huffingtonpost.com/feeds/verticals/technology/index.xml')) - - feeds.append( - (u'Comedy', u'http://www.huffingtonpost.com/feeds/verticals/comedy/index.xml')) - - feeds.append( - (u'World', u'http://www.huffingtonpost.com/feeds/verticals/world/index.xml')) - - feeds.append((u'Original Reporting', - u'http://www.huffingtonpost.com/tag/huffpolitics/feed')) - - extra_css = ''' - h1{font-family :Arial,Helvetica,sans-serif; font-size:large;} - h2{font-family :Arial,Helvetica,sans-serif; font-size:medium; color:#000000;} - h3{font-family :Arial,Helvetica,sans-serif; font-size:medium; color:#000000;} - body{font-family:verdana,arial,helvetica,geneva,sans-serif ;} - #title_permalink{color:black;font-size:large;} - .date{color:#858585;font-family:"Times New Roman",sans-serif;} - .comments_datetime v05{color:#696969;} - .teaser_permalink{font-style:italic;font-size:xx-small;} - .blog_posted_date{color:#696969;font-size:xx-small;font-weight: bold;} - ''' -# a[href]{color: blue; text-decoration: none; cursor: pointer;} - - def get_article_url(self, article): - """ - Workaround for Feedparser behaviour. If an item has more than one element, article.link is empty and - article.links contains a list of dictionaries. - Todo: refactor to searching this list to avoid the hardcoded zero-index - """ - link = article.get('link') - print("Link:" + link) - if not link: - links = article.get('links') - if links: - link = links[0]['href'] - if not links[0]['href']: - link = links[1]['href'] - - return link - - def postprocess_html(self, soup, first_fetch): - for tag in soup.findAll('div', text="What's Your Reaction?"): - tag.extract() - - for tg in soup.findAll('blockquote'): - tg.extract() - - return soup diff --git a/recipes/hvidovreavis_dk.recipe b/recipes/hvidovreavis_dk.recipe deleted file mode 100644 index 2eed9d54d8..0000000000 --- a/recipes/hvidovreavis_dk.recipe +++ /dev/null @@ -1,20 +0,0 @@ -#!/usr/bin/env python -# vim:fileencoding=utf-8 -from __future__ import absolute_import, division, print_function, unicode_literals - -from calibre.web.feeds.news import BasicNewsRecipe - - -class Hvidovre_Avis_dk(BasicNewsRecipe): - title = 'Hvidovre avis' - language = 'da' - oldest_article = 7 - max_articles_per_feed = 50 - auto_cleanup = True - feeds = [ - ('Lokale nyheder', 'http://hvidovre.lokalavisen.dk/section/senestelokalenyhederrss'), - ('Sport', 'http://hvidovre.lokalavisen.dk/section/senestesportrss'), - ('112', 'http://hvidovre.lokalavisen.dk/section/seneste112rss'), - ('Kultur', 'http://hvidovre.lokalavisen.dk/section/senestekulturrss'), - ('Læserbreve', 'http://hvidovre.lokalavisen.dk/section/senestelaeserbreverss'), - ] diff --git a/recipes/hvidovrelokalavisen_dk.recipe b/recipes/hvidovrelokalavisen_dk.recipe deleted file mode 100644 index 21a1169442..0000000000 --- a/recipes/hvidovrelokalavisen_dk.recipe +++ /dev/null @@ -1,32 +0,0 @@ -#!/usr/bin/env python -# vim:fileencoding=utf-8 -# https://manual.calibre-ebook.com/news_recipe.html -from __future__ import absolute_import, division, print_function, unicode_literals - -from calibre.web.feeds.news import BasicNewsRecipe - -''' -Hvidovre Avis -''' - - -class HvidovreLokalavisen_dk(BasicNewsRecipe): - __author__ = 'CoderAllan.github.com' - title = 'Hvidovre Avis' - description = 'Lokale og regionale nyheder, sport og kultur fra Hvidovre på hvidovre.lokalavisen.dk' - category = 'newspaper, news, localnews, sport, culture, Denmark' - oldest_article = 7 - max_articles_per_feed = 50 - auto_cleanup = True - language = 'da' - - feeds = [ - ('Seneste nyt fra Hvidovre Avis', 'http://hvidovre.lokalavisen.dk/section/senestenytrss'), - ('Seneste lokale nyheder fra Hvidovre Avis', 'http://hvidovre.lokalavisen.dk/section/senestelokalenyhederrss'), - ('Seneste sport fra Hvidovre Avis', 'http://hvidovre.lokalavisen.dk/section/senestesportrss'), - ('Seneste 112 nyheder fra Hvidovre Avis', 'http://hvidovre.lokalavisen.dk/section/seneste112rss'), - ('Seneste kultur nyheder fra Hvidovre Avis', 'http://hvidovre.lokalavisen.dk/section/senestekulturrss'), - ('Seneste læserbreve fra Hvidovre Avis', 'http://hvidovre.lokalavisen.dk/section/senestelaeserbreverss'), - - ] - diff --git a/recipes/ibm_smarter_planet.recipe b/recipes/ibm_smarter_planet.recipe deleted file mode 100644 index 77cd3fec57..0000000000 --- a/recipes/ibm_smarter_planet.recipe +++ /dev/null @@ -1,25 +0,0 @@ - -from calibre.web.feeds.news import BasicNewsRecipe - - -class AdvancedUserRecipe1293122276(BasicNewsRecipe): - title = u'Smarter Planet | Tumblr' - __author__ = 'Jack Mason' - author = 'IBM Global Business Services' - publisher = 'IBM' - language = 'en' - category = 'news, technology, IT, internet of things, analytics' - oldest_article = 14 - max_articles_per_feed = 30 - no_stylesheets = True - use_embedded_content = False - masthead_url = 'http://www.hellercd.com/wp-content/uploads/2010/09/hero.jpg' - remove_tags_before = dict(id='item') - remove_tags_after = dict(id='item') - remove_tags = [dict(attrs={'class': ['sidebar', 'about', 'footer', 'description,' 'disqus', 'nav', 'notes', 'disqus_thread']}), - dict(id=['sidebar', 'footer', 'disqus', 'nav', 'notes', - 'likes_container', 'description', 'disqus_thread', 'about']), - dict(name=['script', 'noscript', 'style'])] - - feeds = [(u'Smarter Planet Tumblr', - u'http://smarterplanet.tumblr.com/mobile/rss')] diff --git a/recipes/icons/f1_ultra.png b/recipes/icons/f1_ultra.png deleted file mode 100644 index 45565f2788..0000000000 Binary files a/recipes/icons/f1_ultra.png and /dev/null differ diff --git a/recipes/icons/f_secure.png b/recipes/icons/f_secure.png deleted file mode 100644 index a36e72ce3d..0000000000 Binary files a/recipes/icons/f_secure.png and /dev/null differ diff --git a/recipes/icons/favrskovavisen_dk.png b/recipes/icons/favrskovavisen_dk.png deleted file mode 100644 index 7ade20160b..0000000000 Binary files a/recipes/icons/favrskovavisen_dk.png and /dev/null differ diff --git a/recipes/icons/favrskovlokalavisen_dk.png b/recipes/icons/favrskovlokalavisen_dk.png deleted file mode 100644 index 70464fce89..0000000000 Binary files a/recipes/icons/favrskovlokalavisen_dk.png and /dev/null differ diff --git a/recipes/icons/faznet.png b/recipes/icons/faznet.png deleted file mode 100644 index 8f6e9b2dad..0000000000 Binary files a/recipes/icons/faznet.png and /dev/null differ diff --git a/recipes/icons/fc_knudde.png b/recipes/icons/fc_knudde.png deleted file mode 100644 index 8253e6eb27..0000000000 Binary files a/recipes/icons/fc_knudde.png and /dev/null differ diff --git a/recipes/icons/fdb_pl.png b/recipes/icons/fdb_pl.png deleted file mode 100644 index a6bb59e9bb..0000000000 Binary files a/recipes/icons/fdb_pl.png and /dev/null differ diff --git a/recipes/icons/fe_india.png b/recipes/icons/fe_india.png deleted file mode 100644 index f07c735fec..0000000000 Binary files a/recipes/icons/fe_india.png and /dev/null differ diff --git a/recipes/icons/felicia.png b/recipes/icons/felicia.png deleted file mode 100644 index 9e29f52816..0000000000 Binary files a/recipes/icons/felicia.png and /dev/null differ diff --git a/recipes/icons/fhm_uk.png b/recipes/icons/fhm_uk.png deleted file mode 100644 index d62dcedf26..0000000000 Binary files a/recipes/icons/fhm_uk.png and /dev/null differ diff --git a/recipes/icons/fhmro.png b/recipes/icons/fhmro.png deleted file mode 100644 index d95c1fd06c..0000000000 Binary files a/recipes/icons/fhmro.png and /dev/null differ diff --git a/recipes/icons/fifty_two.png b/recipes/icons/fifty_two.png deleted file mode 100644 index 48923e874d..0000000000 Binary files a/recipes/icons/fifty_two.png and /dev/null differ diff --git a/recipes/icons/fisco_oggi.png b/recipes/icons/fisco_oggi.png deleted file mode 100644 index 50764afdde..0000000000 Binary files a/recipes/icons/fisco_oggi.png and /dev/null differ diff --git a/recipes/icons/fleshbot.png b/recipes/icons/fleshbot.png deleted file mode 100644 index 024afd514f..0000000000 Binary files a/recipes/icons/fleshbot.png and /dev/null differ diff --git a/recipes/icons/folkebladet_dk.png b/recipes/icons/folkebladet_dk.png deleted file mode 100644 index 1bae425939..0000000000 Binary files a/recipes/icons/folkebladet_dk.png and /dev/null differ diff --git a/recipes/icons/folkebladetdjursland_dk.png b/recipes/icons/folkebladetdjursland_dk.png deleted file mode 100644 index 7ade20160b..0000000000 Binary files a/recipes/icons/folkebladetdjursland_dk.png and /dev/null differ diff --git a/recipes/icons/folketidende_dk.png b/recipes/icons/folketidende_dk.png deleted file mode 100644 index e85e348b6a..0000000000 Binary files a/recipes/icons/folketidende_dk.png and /dev/null differ diff --git a/recipes/icons/forbes.png b/recipes/icons/forbes.png deleted file mode 100644 index 80ac1805c7..0000000000 Binary files a/recipes/icons/forbes.png and /dev/null differ diff --git a/recipes/icons/forbes_india.png b/recipes/icons/forbes_india.png deleted file mode 100644 index 49b784af16..0000000000 Binary files a/recipes/icons/forbes_india.png and /dev/null differ diff --git a/recipes/icons/forbes_pl.png b/recipes/icons/forbes_pl.png deleted file mode 100644 index 2d3c051e01..0000000000 Binary files a/recipes/icons/forbes_pl.png and /dev/null differ diff --git a/recipes/icons/formulaas.png b/recipes/icons/formulaas.png deleted file mode 100644 index 5f2a69384c..0000000000 Binary files a/recipes/icons/formulaas.png and /dev/null differ diff --git a/recipes/icons/forsal.png b/recipes/icons/forsal.png deleted file mode 100644 index 3ab23b9994..0000000000 Binary files a/recipes/icons/forsal.png and /dev/null differ diff --git a/recipes/icons/fotoblogia_pl.png b/recipes/icons/fotoblogia_pl.png deleted file mode 100644 index fcd86454bc..0000000000 Binary files a/recipes/icons/fotoblogia_pl.png and /dev/null differ diff --git a/recipes/icons/fr_online.png b/recipes/icons/fr_online.png deleted file mode 100644 index 6752828ad5..0000000000 Binary files a/recipes/icons/fr_online.png and /dev/null differ diff --git a/recipes/icons/frankfurter_rundschau.png b/recipes/icons/frankfurter_rundschau.png deleted file mode 100644 index 6752828ad5..0000000000 Binary files a/recipes/icons/frankfurter_rundschau.png and /dev/null differ diff --git a/recipes/icons/freakonomics.png b/recipes/icons/freakonomics.png deleted file mode 100644 index 16f623a725..0000000000 Binary files a/recipes/icons/freakonomics.png and /dev/null differ diff --git a/recipes/icons/fredensborglokalavisen_dk.png b/recipes/icons/fredensborglokalavisen_dk.png deleted file mode 100644 index 70464fce89..0000000000 Binary files a/recipes/icons/fredensborglokalavisen_dk.png and /dev/null differ diff --git a/recipes/icons/fredericialokalavisen_dk.png b/recipes/icons/fredericialokalavisen_dk.png deleted file mode 100644 index 70464fce89..0000000000 Binary files a/recipes/icons/fredericialokalavisen_dk.png and /dev/null differ diff --git a/recipes/icons/frederiksbergbladet_dk.png b/recipes/icons/frederiksbergbladet_dk.png deleted file mode 100644 index 6fc915ca3d..0000000000 Binary files a/recipes/icons/frederiksbergbladet_dk.png and /dev/null differ diff --git a/recipes/icons/frederikssundlokalavisen_dk.png b/recipes/icons/frederikssundlokalavisen_dk.png deleted file mode 100644 index 70464fce89..0000000000 Binary files a/recipes/icons/frederikssundlokalavisen_dk.png and /dev/null differ diff --git a/recipes/icons/furesoelokalavisen_dk.png b/recipes/icons/furesoelokalavisen_dk.png deleted file mode 100644 index 70464fce89..0000000000 Binary files a/recipes/icons/furesoelokalavisen_dk.png and /dev/null differ diff --git a/recipes/icons/gamasutra_fa.png b/recipes/icons/gamasutra_fa.png deleted file mode 100644 index 7d24b85981..0000000000 Binary files a/recipes/icons/gamasutra_fa.png and /dev/null differ diff --git a/recipes/icons/gamasutra_news.png b/recipes/icons/gamasutra_news.png deleted file mode 100644 index 7d24b85981..0000000000 Binary files a/recipes/icons/gamasutra_news.png and /dev/null differ diff --git a/recipes/icons/gamespot.png b/recipes/icons/gamespot.png deleted file mode 100644 index 37eb696399..0000000000 Binary files a/recipes/icons/gamespot.png and /dev/null differ diff --git a/recipes/icons/gandul.png b/recipes/icons/gandul.png deleted file mode 100644 index b4cb8ba2c5..0000000000 Binary files a/recipes/icons/gandul.png and /dev/null differ diff --git a/recipes/icons/gazeta_lubuska.png b/recipes/icons/gazeta_lubuska.png deleted file mode 100644 index 50c46dc980..0000000000 Binary files a/recipes/icons/gazeta_lubuska.png and /dev/null differ diff --git a/recipes/icons/gazeta_pl_bydgoszcz.png b/recipes/icons/gazeta_pl_bydgoszcz.png deleted file mode 100644 index b231f7d696..0000000000 Binary files a/recipes/icons/gazeta_pl_bydgoszcz.png and /dev/null differ diff --git a/recipes/icons/gazeta_pl_szczecin.png b/recipes/icons/gazeta_pl_szczecin.png deleted file mode 100644 index b231f7d696..0000000000 Binary files a/recipes/icons/gazeta_pl_szczecin.png and /dev/null differ diff --git a/recipes/icons/gazeta_pomorska.png b/recipes/icons/gazeta_pomorska.png deleted file mode 100644 index c543b088e5..0000000000 Binary files a/recipes/icons/gazeta_pomorska.png and /dev/null differ diff --git a/recipes/icons/gazeta_wroclawska.png b/recipes/icons/gazeta_wroclawska.png deleted file mode 100644 index 4af0ba2452..0000000000 Binary files a/recipes/icons/gazeta_wroclawska.png and /dev/null differ diff --git a/recipes/icons/gazeta_wspolczesna.png b/recipes/icons/gazeta_wspolczesna.png deleted file mode 100644 index 31a38235f7..0000000000 Binary files a/recipes/icons/gazeta_wspolczesna.png and /dev/null differ diff --git a/recipes/icons/gazeta_wyborcza.png b/recipes/icons/gazeta_wyborcza.png deleted file mode 100644 index b231f7d696..0000000000 Binary files a/recipes/icons/gazeta_wyborcza.png and /dev/null differ diff --git a/recipes/icons/gcn.png b/recipes/icons/gcn.png deleted file mode 100644 index e5bdfe87e8..0000000000 Binary files a/recipes/icons/gcn.png and /dev/null differ diff --git a/recipes/icons/geek_poke.png b/recipes/icons/geek_poke.png deleted file mode 100644 index 8d4b1dca9a..0000000000 Binary files a/recipes/icons/geek_poke.png and /dev/null differ diff --git a/recipes/icons/gentoftelokalavisen_dk.png b/recipes/icons/gentoftelokalavisen_dk.png deleted file mode 100644 index 70464fce89..0000000000 Binary files a/recipes/icons/gentoftelokalavisen_dk.png and /dev/null differ diff --git a/recipes/icons/german_gov.png b/recipes/icons/german_gov.png deleted file mode 100644 index 0b2245ca4a..0000000000 Binary files a/recipes/icons/german_gov.png and /dev/null differ diff --git a/recipes/icons/gezgin_dergi.png b/recipes/icons/gezgin_dergi.png deleted file mode 100644 index 4688fc237f..0000000000 Binary files a/recipes/icons/gezgin_dergi.png and /dev/null differ diff --git a/recipes/icons/gildia_pl.png b/recipes/icons/gildia_pl.png deleted file mode 100644 index 82d334035e..0000000000 Binary files a/recipes/icons/gildia_pl.png and /dev/null differ diff --git a/recipes/icons/gizmodo.png b/recipes/icons/gizmodo.png deleted file mode 100644 index 02ba202d47..0000000000 Binary files a/recipes/icons/gizmodo.png and /dev/null differ diff --git a/recipes/icons/glamour.png b/recipes/icons/glamour.png deleted file mode 100644 index 130d477afd..0000000000 Binary files a/recipes/icons/glamour.png and /dev/null differ diff --git a/recipes/icons/glennbeck.png b/recipes/icons/glennbeck.png deleted file mode 100644 index 75cafd494e..0000000000 Binary files a/recipes/icons/glennbeck.png and /dev/null differ diff --git a/recipes/icons/glos_wielkopolski.png b/recipes/icons/glos_wielkopolski.png deleted file mode 100644 index 504b3e80eb..0000000000 Binary files a/recipes/icons/glos_wielkopolski.png and /dev/null differ diff --git a/recipes/icons/go4it.png b/recipes/icons/go4it.png deleted file mode 100644 index b7e529a95c..0000000000 Binary files a/recipes/icons/go4it.png and /dev/null differ diff --git a/recipes/icons/goal.png b/recipes/icons/goal.png deleted file mode 100644 index 3d828d0838..0000000000 Binary files a/recipes/icons/goal.png and /dev/null differ diff --git a/recipes/icons/gofin_pl.png b/recipes/icons/gofin_pl.png deleted file mode 100644 index 6efbb2a7af..0000000000 Binary files a/recipes/icons/gofin_pl.png and /dev/null differ diff --git a/recipes/icons/good_to_know.png b/recipes/icons/good_to_know.png deleted file mode 100644 index 256f3647d9..0000000000 Binary files a/recipes/icons/good_to_know.png and /dev/null differ diff --git a/recipes/icons/googlemobileblog.png b/recipes/icons/googlemobileblog.png deleted file mode 100644 index af6a32434e..0000000000 Binary files a/recipes/icons/googlemobileblog.png and /dev/null differ diff --git a/recipes/icons/grenaalokalavisen_dk.png b/recipes/icons/grenaalokalavisen_dk.png deleted file mode 100644 index 70464fce89..0000000000 Binary files a/recipes/icons/grenaalokalavisen_dk.png and /dev/null differ diff --git a/recipes/icons/gribskovlokalavisen_dk.png b/recipes/icons/gribskovlokalavisen_dk.png deleted file mode 100644 index 70464fce89..0000000000 Binary files a/recipes/icons/gribskovlokalavisen_dk.png and /dev/null differ diff --git a/recipes/icons/grrm.png b/recipes/icons/grrm.png deleted file mode 100644 index 819b87e8c2..0000000000 Binary files a/recipes/icons/grrm.png and /dev/null differ diff --git a/recipes/icons/gs24_pl.png b/recipes/icons/gs24_pl.png deleted file mode 100644 index 0be176e730..0000000000 Binary files a/recipes/icons/gs24_pl.png and /dev/null differ diff --git a/recipes/icons/gulfnews.png b/recipes/icons/gulfnews.png deleted file mode 100644 index 8e4bbc3c4f..0000000000 Binary files a/recipes/icons/gulfnews.png and /dev/null differ diff --git a/recipes/icons/haderslevlokalavisen_dk.png b/recipes/icons/haderslevlokalavisen_dk.png deleted file mode 100644 index 70464fce89..0000000000 Binary files a/recipes/icons/haderslevlokalavisen_dk.png and /dev/null differ diff --git a/recipes/icons/hankyoreh.png b/recipes/icons/hankyoreh.png deleted file mode 100644 index 5f8721276b..0000000000 Binary files a/recipes/icons/hankyoreh.png and /dev/null differ diff --git a/recipes/icons/heavy_metal_it.png b/recipes/icons/heavy_metal_it.png deleted file mode 100644 index 46529cb072..0000000000 Binary files a/recipes/icons/heavy_metal_it.png and /dev/null differ diff --git a/recipes/icons/heise_open.png b/recipes/icons/heise_open.png deleted file mode 100644 index 27418ca722..0000000000 Binary files a/recipes/icons/heise_open.png and /dev/null differ diff --git a/recipes/icons/helsingin_sanomat.png b/recipes/icons/helsingin_sanomat.png deleted file mode 100644 index 97ad6a8755..0000000000 Binary files a/recipes/icons/helsingin_sanomat.png and /dev/null differ diff --git a/recipes/icons/hindu_human_rights.png b/recipes/icons/hindu_human_rights.png deleted file mode 100644 index bc8088750e..0000000000 Binary files a/recipes/icons/hindu_human_rights.png and /dev/null differ diff --git a/recipes/icons/hnonline.png b/recipes/icons/hnonline.png deleted file mode 100644 index b0df23d5f1..0000000000 Binary files a/recipes/icons/hnonline.png and /dev/null differ diff --git a/recipes/icons/hoersholmlokalavisen_dk.png b/recipes/icons/hoersholmlokalavisen_dk.png deleted file mode 100644 index 70464fce89..0000000000 Binary files a/recipes/icons/hoersholmlokalavisen_dk.png and /dev/null differ diff --git a/recipes/icons/hola.png b/recipes/icons/hola.png deleted file mode 100644 index 7e806e2cb8..0000000000 Binary files a/recipes/icons/hola.png and /dev/null differ diff --git a/recipes/icons/hollywood_reporter.png b/recipes/icons/hollywood_reporter.png deleted file mode 100644 index 91e98e3fac..0000000000 Binary files a/recipes/icons/hollywood_reporter.png and /dev/null differ diff --git a/recipes/icons/hornsherredlokalavisen_dk.png b/recipes/icons/hornsherredlokalavisen_dk.png deleted file mode 100644 index 70464fce89..0000000000 Binary files a/recipes/icons/hornsherredlokalavisen_dk.png and /dev/null differ diff --git a/recipes/icons/hotcity.png b/recipes/icons/hotcity.png deleted file mode 100644 index 3b7039012d..0000000000 Binary files a/recipes/icons/hotcity.png and /dev/null differ diff --git a/recipes/icons/hotnews.png b/recipes/icons/hotnews.png deleted file mode 100644 index 9e4deeb436..0000000000 Binary files a/recipes/icons/hotnews.png and /dev/null differ diff --git a/recipes/icons/house_news.png b/recipes/icons/house_news.png deleted file mode 100644 index 4b1c1998ad..0000000000 Binary files a/recipes/icons/house_news.png and /dev/null differ diff --git a/recipes/icons/hrt.png b/recipes/icons/hrt.png deleted file mode 100644 index f7f52dc87d..0000000000 Binary files a/recipes/icons/hrt.png and /dev/null differ diff --git a/recipes/icons/huffingtonpost.png b/recipes/icons/huffingtonpost.png deleted file mode 100644 index 0bd11bd96f..0000000000 Binary files a/recipes/icons/huffingtonpost.png and /dev/null differ diff --git a/recipes/icons/huffingtonpost_uk.png b/recipes/icons/huffingtonpost_uk.png deleted file mode 100644 index 392cf8b710..0000000000 Binary files a/recipes/icons/huffingtonpost_uk.png and /dev/null differ diff --git a/recipes/icons/hvidovreavis_dk.png b/recipes/icons/hvidovreavis_dk.png deleted file mode 100644 index 253d59b258..0000000000 Binary files a/recipes/icons/hvidovreavis_dk.png and /dev/null differ diff --git a/recipes/icons/hvidovrelokalavisen_dk.png b/recipes/icons/hvidovrelokalavisen_dk.png deleted file mode 100644 index 70464fce89..0000000000 Binary files a/recipes/icons/hvidovrelokalavisen_dk.png and /dev/null differ diff --git a/recipes/icons/ideal_almeria.png b/recipes/icons/ideal_almeria.png deleted file mode 100644 index 022305f78b..0000000000 Binary files a/recipes/icons/ideal_almeria.png and /dev/null differ diff --git a/recipes/icons/ideal_granada.png b/recipes/icons/ideal_granada.png deleted file mode 100644 index 022305f78b..0000000000 Binary files a/recipes/icons/ideal_granada.png and /dev/null differ diff --git a/recipes/icons/ideal_jaen.png b/recipes/icons/ideal_jaen.png deleted file mode 100644 index 022305f78b..0000000000 Binary files a/recipes/icons/ideal_jaen.png and /dev/null differ diff --git a/recipes/icons/idg_se.png b/recipes/icons/idg_se.png deleted file mode 100644 index f914292957..0000000000 Binary files a/recipes/icons/idg_se.png and /dev/null differ diff --git a/recipes/icons/iekspries.png b/recipes/icons/iekspries.png deleted file mode 100644 index f0c543dcf7..0000000000 Binary files a/recipes/icons/iekspries.png and /dev/null differ diff --git a/recipes/icons/ihned.cz.png b/recipes/icons/ihned.cz.png deleted file mode 100644 index 0cf00abae8..0000000000 Binary files a/recipes/icons/ihned.cz.png and /dev/null differ diff --git a/recipes/icons/ihned.png b/recipes/icons/ihned.png deleted file mode 100644 index 0cf00abae8..0000000000 Binary files a/recipes/icons/ihned.png and /dev/null differ diff --git a/recipes/icons/il_foglio.png b/recipes/icons/il_foglio.png deleted file mode 100644 index ba530ffb07..0000000000 Binary files a/recipes/icons/il_foglio.png and /dev/null differ diff --git a/recipes/icons/iliteratura_cz.png b/recipes/icons/iliteratura_cz.png deleted file mode 100644 index 9b7784fe67..0000000000 Binary files a/recipes/icons/iliteratura_cz.png and /dev/null differ diff --git a/recipes/icons/indy_star.png b/recipes/icons/indy_star.png deleted file mode 100644 index 59769c64c6..0000000000 Binary files a/recipes/icons/indy_star.png and /dev/null differ diff --git a/recipes/icons/infomotori.png b/recipes/icons/infomotori.png deleted file mode 100644 index 2146ac4e6b..0000000000 Binary files a/recipes/icons/infomotori.png and /dev/null differ diff --git a/recipes/icons/infoworld.png b/recipes/icons/infoworld.png deleted file mode 100644 index 6293b70f91..0000000000 Binary files a/recipes/icons/infoworld.png and /dev/null differ diff --git a/recipes/icons/infra_pl.png b/recipes/icons/infra_pl.png deleted file mode 100644 index f2ff580e44..0000000000 Binary files a/recipes/icons/infra_pl.png and /dev/null differ diff --git a/recipes/icons/inopressa.png b/recipes/icons/inopressa.png deleted file mode 100644 index e5c7259e56..0000000000 Binary files a/recipes/icons/inopressa.png and /dev/null differ diff --git a/recipes/icons/insider.png b/recipes/icons/insider.png deleted file mode 100644 index b791c89e18..0000000000 Binary files a/recipes/icons/insider.png and /dev/null differ diff --git a/recipes/icons/io9.png b/recipes/icons/io9.png deleted file mode 100644 index 19208caf56..0000000000 Binary files a/recipes/icons/io9.png and /dev/null differ diff --git a/recipes/icons/ionline_pt.png b/recipes/icons/ionline_pt.png deleted file mode 100644 index b4a933297f..0000000000 Binary files a/recipes/icons/ionline_pt.png and /dev/null differ diff --git a/recipes/icons/ivanamilakovic.png b/recipes/icons/ivanamilakovic.png deleted file mode 100644 index 293ea03423..0000000000 Binary files a/recipes/icons/ivanamilakovic.png and /dev/null differ diff --git a/recipes/ideal_almeria.recipe b/recipes/ideal_almeria.recipe deleted file mode 100644 index 6d8c551d3e..0000000000 --- a/recipes/ideal_almeria.recipe +++ /dev/null @@ -1,66 +0,0 @@ -# encoding: utf-8 -*- - -__license__ = 'GPL v3' -__author__ = 'Josemi Liébana ' -__copyright__ = 'Josemi Liébana' -__version__ = 'v0.1' -__date__ = '5 January 2012' - - -''' -www.ideal.es -''' - -from calibre.web.feeds.news import BasicNewsRecipe - - -class Ideal(BasicNewsRecipe): - title = u'Ideal (Edición Almería)' - __author__ = u'Josemi Liébana' - description = u'Noticias de Almería y el resto del mundo' - publisher = 'Ideal' - category = u'News, Politics, Spain, Almería' - publication_type = 'Newspaper' - oldest_article = 2 - max_articles_per_feed = 200 - no_stylesheets = True - encoding = 'cp1252' - use_embedded_content = False - language = 'es' - remove_empty_feeds = True - masthead_url = u'http://www.ideal.es/img/rd.logotipo2_ideal.gif' - cover_url = u'http://www.ideal.es/granada/noticias/201112/24/Media/Granada/portada--647x894.JPG' - extra_css = u' body{font-family: Arial,Helvetica,sans-serif } img{margin-bottom: 0.4em} .photo-caption{font-size: x-small} ' - - conversion_options = { - 'comment': description, 'tags': category, 'publisher': publisher, 'language': language - } - - keep_only_tags = [ - dict(attrs={'id': 'title'}), dict(attrs={'class': [ - 'overhead', 'headline', 'subhead', 'date', 'text', 'noticia_cont', 'desarrollo']}) - ] - - remove_tags = [dict(name='ul')] - - remove_attributes = ['width', 'height'] - - feeds = [ - - (u'Última Hora', u'http://www.ideal.es/almeria/rss/feeds/ultima.xml'), - (u'Portada', u'http://www.ideal.es/almeria/portada.xml'), - (u'Local', u'http://www.ideal.es/almeria/rss/feeds/granada.xml'), - (u'Deportes', u'http://www.ideal.es/almeria/rss/feeds/deportes.xml'), - (u'Sociedad', u'http://www.ideal.es/almeria/rss/feeds/sociedad.xml'), - (u'Cultura', u'http://www.ideal.es/almeria/rss/feeds/cultura.xml'), - (u'Economía', u'http://www.ideal.es/almeria/rss/feeds/economia.xml'), - (u'Costa', u'http://www.ideal.es/almeria/rss/feeds/costa.xml'), - (u'Puerta Purchena', u'http://www.ideal.es/almeria/rss/feeds/puerta_purchena.xml'), - (u'Andalucía', u'http://www.ideal.es/almeria/rss/feeds/andalucia.xml'), - (u'España', u'http://www.ideal.es/almeria/rss/feeds/espana.xml'), - (u'Mundo', u'http://www.ideal.es/almeria/rss/feeds/internacional.xml'), - (u'Vivir', u'http://www.ideal.es/almeria/rss/feeds/vivir.xml'), - (u'Opinión', u'http://www.ideal.es/almeria/rss/feeds/opinion.xml'), - (u'Televisión', u'http://www.ideal.es/almeria/rss/feeds/television.xml'), - (u'Contraportada', u'http://www.ideal.es/almeria/rss/feeds/contraportada.xml') - ] diff --git a/recipes/ideal_granada.recipe b/recipes/ideal_granada.recipe deleted file mode 100644 index 4d0669f0ab..0000000000 --- a/recipes/ideal_granada.recipe +++ /dev/null @@ -1,67 +0,0 @@ -# encoding: utf-8 -*- - -__license__ = 'GPL v3' -__author__ = 'Josemi Liébana ' -__copyright__ = 'Josemi Liébana' -__version__ = 'v0.1' -__date__ = '5 January 2012' - - -''' -www.ideal.es -''' - -from calibre.web.feeds.news import BasicNewsRecipe - - -class Ideal(BasicNewsRecipe): - title = u'Ideal (Edición Granada)' - __author__ = u'Josemi Liébana' - description = u'Noticias de Granada y el resto del mundo' - publisher = 'Ideal' - category = 'News, Politics, Spain, Granada' - publication_type = 'Newspaper' - oldest_article = 2 - max_articles_per_feed = 200 - no_stylesheets = True - encoding = 'cp1252' - use_embedded_content = False - language = 'es' - remove_empty_feeds = True - masthead_url = 'http://www.ideal.es/img/rd.logotipo2_ideal.gif' - cover_url = 'http://www.ideal.es/granada/noticias/201112/24/Media/Granada/portada--647x894.JPG' - extra_css = ' body{font-family: Arial,Helvetica,sans-serif } img{margin-bottom: 0.4em} .photo-caption{font-size: x-small} ' - - conversion_options = { - 'comment': description, 'tags': category, 'publisher': publisher, 'language': language - } - - keep_only_tags = [ - dict(attrs={'id': 'title'}), dict(attrs={'class': [ - 'overhead', 'headline', 'subhead', 'date', 'text', 'noticia_cont', 'desarrollo']}) - ] - - remove_tags = [dict(name='ul')] - - remove_attributes = ['width', 'height'] - - feeds = [ - - (u'Última Hora', u'http://www.ideal.es/granada/rss/feeds/ultima.xml'), - (u'Portada', u'http://www.ideal.es/granada/portada.xml'), - (u'Local', u'http://www.ideal.es/granada/rss/feeds/granada.xml'), - (u'Deportes', u'http://www.ideal.es/granada/rss/feeds/deportes.xml'), - (u'Sociedad', u'http://www.ideal.es/granada/rss/feeds/sociedad.xml'), - (u'Cultura', u'http://www.ideal.es/granada/rss/feeds/cultura.xml'), - (u'Economía', u'http://www.ideal.es/granada/rss/feeds/economia.xml'), - (u'Costa', u'http://www.ideal.es/granada/rss/feeds/costa.xml'), - (u'La Carrera', u'http://www.ideal.es/granada/rss/feeds/la_carrera.xml'), - (u'Puerta Real', u'http://www.ideal.es/granada/rss/feeds/puerta_real.xml'), - (u'Andalucía', u'http://www.ideal.es/granada/rss/feeds/andalucia.xml'), - (u'España', u'http://www.ideal.es/granada/rss/feeds/espana.xml'), - (u'Mundo', u'http://www.ideal.es/granada/rss/feeds/internacional.xml'), - (u'Vivir', u'http://www.ideal.es/granada/rss/feeds/vivir.xml'), - (u'Opinión', u'http://www.ideal.es/granada/rss/feeds/opinion.xml'), - (u'Televisión', u'http://www.ideal.es/granada/rss/feeds/television.xml'), - (u'Contraportada', u'http://www.ideal.es/granada/rss/feeds/contraportada.xml') - ] diff --git a/recipes/ideal_jaen.recipe b/recipes/ideal_jaen.recipe deleted file mode 100644 index 9509836592..0000000000 --- a/recipes/ideal_jaen.recipe +++ /dev/null @@ -1,65 +0,0 @@ -# encoding: utf-8 -*- - -__license__ = 'GPL v3' -__author__ = 'Josemi Liébana ' -__copyright__ = 'Josemi Liébana' -__version__ = 'v0.1' -__date__ = '5 January 2012' - - -''' -www.ideal.es -''' - -from calibre.web.feeds.news import BasicNewsRecipe - - -class Ideal(BasicNewsRecipe): - title = u'Ideal (Edición Jaén)' - __author__ = u'Josemi Liébana' - description = u'Noticias de Jaén y el resto del mundo' - publisher = 'Ideal' - category = u'News, Politics, Spain, Jaén' - publication_type = 'Newspaper' - oldest_article = 2 - max_articles_per_feed = 200 - no_stylesheets = True - encoding = 'cp1252' - use_embedded_content = False - language = 'es' - remove_empty_feeds = True - masthead_url = 'http://www.ideal.es/img/rd.logotipo2_ideal.gif' - cover_url = 'http://www.ideal.es/granada/noticias/201112/24/Media/Granada/portada--647x894.JPG' - extra_css = ' body{font-family: Arial,Helvetica,sans-serif } img{margin-bottom: 0.4em} .photo-caption{font-size: x-small} ' - - conversion_options = { - 'comment': description, 'tags': category, 'publisher': publisher, 'language': language - } - - keep_only_tags = [ - dict(attrs={'id': 'title'}), dict(attrs={'class': [ - 'overhead', 'headline', 'subhead', 'date', 'text', 'noticia_cont', 'desarrollo']}) - ] - - remove_tags = [dict(name='ul')] - - remove_attributes = ['width', 'height'] - - feeds = [ - - (u'Última Hora', u'http://www.ideal.es/jaen/rss/feeds/ultima.xml'), - (u'Portada', u'http://www.ideal.es/jaen/portada.xml'), - (u'Local', u'http://www.ideal.es/jaen/rss/feeds/granada.xml'), - (u'Deportes', u'http://www.ideal.es/jaen/rss/feeds/deportes.xml'), - (u'Sociedad', u'http://www.ideal.es/jaen/rss/feeds/sociedad.xml'), - (u'Cultura', u'http://www.ideal.es/jaen/rss/feeds/cultura.xml'), - (u'Economía', u'http://www.ideal.es/jaen/rss/feeds/economia.xml'), - (u'Costa', u'http://www.ideal.es/jaen/rss/feeds/costa.xml'), - (u'Andalucía', u'http://www.ideal.es/jaen/rss/feeds/andalucia.xml'), - (u'España', u'http://www.ideal.es/jaen/rss/feeds/espana.xml'), - (u'Mundo', u'http://www.ideal.es/jaen/rss/feeds/internacional.xml'), - (u'Vivir', u'http://www.ideal.es/jaen/rss/feeds/vivir.xml'), - (u'Opinión', u'http://www.ideal.es/jaen/rss/feeds/opinion.xml'), - (u'Televisión', u'http://www.ideal.es/jaen/rss/feeds/television.xml'), - (u'Contraportada', u'http://www.ideal.es/jaen/rss/feeds/contraportada.xml') - ] diff --git a/recipes/idg_se.recipe b/recipes/idg_se.recipe deleted file mode 100644 index fee66ab9c3..0000000000 --- a/recipes/idg_se.recipe +++ /dev/null @@ -1,36 +0,0 @@ -__license__ = 'GPLv3' - -from calibre.web.feeds.news import BasicNewsRecipe - - -class IDGse(BasicNewsRecipe): - title = 'IDG' - __author__ = 'Stanislav Khromov' - language = 'sv' - description = 'IDG.se' - oldest_article = 1 - max_articles_per_feed = 256 - no_stylesheets = True - encoding = 'utf-8' - remove_javascript = True - - feeds = [(u'Dagens IDG-nyheter', u'http://feeds.idg.se/idg/ETkj?format=xml')] - - def get_article_url(self, article): - return article.get('guid', None) - - def print_version(self, url): - return url + '?articleRenderMode=print&m=print' - - def get_cover_url(this): - return 'http://idgmedia.idg.se/polopoly_fs/2.3275!images/idgmedia_logo_75.jpg' - - keep_only_tags = [ - dict(name='h1'), - dict(name='div', attrs={'class': ['divColumn1Article']}), - ] - # remove ads - remove_tags = [ - dict(name='div', attrs={'id': ['preamble_ad']}), - dict(name='ul', attrs={'class': ['share']}) - ] diff --git a/recipes/iekspries.recipe b/recipes/iekspries.recipe deleted file mode 100644 index bede039bd3..0000000000 --- a/recipes/iekspries.recipe +++ /dev/null @@ -1,23 +0,0 @@ -#!/usr/bin/env python -# vim:fileencoding=utf-8 -from __future__ import absolute_import, division, print_function, unicode_literals - -from calibre.web.feeds.news import BasicNewsRecipe - - -class AdvancedUserRecipe1434052728(BasicNewsRecipe): - title = '\u0415\u043a\u0441\u043f\u0440\u0435\u0441' - oldest_article = 7 - max_articles_per_feed = 100 - __author__ = 'rpalyvoda' - language = 'uk' - cover_url = 'https://upload.wikimedia.org/wikipedia/uk/7/7b/Expres-logo.png' - masthead_url = 'https://upload.wikimedia.org/wikipedia/uk/7/7b/Expres-logo.png' - auto_cleanup = True - - feeds = [ - ('\u0413\u043e\u043b\u043e\u0432\u043d\u0456 \u043d\u043e\u0432\u0438\u043d\u0438', - 'http://expres.ua/rss.xml'), - ('\u041b\u044c\u0432\u0456\u0432\u0441\u044c\u043a\u0456 \u043d\u043e\u0432\u0438\u043d\u0438', - 'http://lvivexpres.com/rss.xml'), - ] diff --git a/recipes/ihned.cz.recipe b/recipes/ihned.cz.recipe deleted file mode 100644 index 29cb41c2e2..0000000000 --- a/recipes/ihned.cz.recipe +++ /dev/null @@ -1,36 +0,0 @@ -# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai -from __future__ import unicode_literals - -from calibre.web.feeds.recipes import BasicNewsRecipe - - -class ihnedRecipe(BasicNewsRecipe): - __author__ = 'bubak' - title = u'iHNed.cz' - publisher = u'' - description = 'ihned.cz' - oldest_article = 1 - max_articles_per_feed = 20 - use_embedded_content = False - - feeds = [ - (u'Zprávy', u'http://zpravy.ihned.cz/?m=rss'), - (u'Hospodářské noviny', u'http://hn.ihned.cz/?p=500000_rss'), - (u'Byznys', u'http://byznys.ihned.cz/?m=rss'), - (u'Life', u'http://life.ihned.cz/?m=rss'), - (u'Dialog', u'http://dialog.ihned.cz/?m=rss') - ] - - language = 'cs' - cover_url = 'http://rss.ihned.cz/img/0/0_hp09/ihned.cz.gif' - remove_javascript = True - no_stylesheets = True - extra_css = """ - """ - - remove_attributes = [] - remove_tags_before = dict(name='div', attrs={'id': ['heading']}) - remove_tags_after = dict(name='div', attrs={'id': ['next-authors']}) - remove_tags = [dict(name='ul', attrs={'id': ['comm']}), - dict(name='div', attrs={'id': ['r-big']}), - dict(name='div', attrs={'class': ['tools tools-top']})] diff --git a/recipes/ihned.recipe b/recipes/ihned.recipe deleted file mode 100644 index 1d4b8993f6..0000000000 --- a/recipes/ihned.recipe +++ /dev/null @@ -1,175 +0,0 @@ -import re -import time - -from calibre import strftime -from calibre.web.feeds.recipes import BasicNewsRecipe - - -class IHNed(BasicNewsRecipe): - - stahnout_vsechny = True - # True = stahuje vsechny z homepage - # False = stahuje pouze dnesni clanky (ze dne, kdy je skript spusten) - - title = 'iHNed' - __author__ = 'Karel Bílek' - language = 'cs' - description = 'Zprávy z iHNed.cz' - timefmt = ' [%a, %d %b, %Y]' - needs_subscription = False - remove_tags = [dict(attrs={'class': ['borderbottom', 'web', 'foot', 'reklama', 'd-elm d-rellinks', 'd-elm']}), - dict(style=['text-align: center;']), - dict(id=['r-bfull']), - dict(name=['script', 'noscript', 'style'])] - encoding = 'windows-1250' - no_stylesheets = True - remove_tags_before = dict(attrs={'class': 'd-nadtit'}) - remove_tags_after = dict(attrs={'class': 'like'}) - - conversion_options = { - 'linearize_tables': True, - } - - def preprocess_html(self, soup): - - def makeurl(wat): - return "http://ihned.cz" + wat - - for h1 in soup.findAll('h1'): - a = h1.find('a') - if a: - string = a.string - if string: - soup.a.replaceWith(string) - for a in soup.findAll('a', href=True): - cil = str(a['href']) - if cil.startswith("/") or cil.startswith("index"): - a['href'] = makeurl(cil) - return soup - - def parse_index(self): - - def makeurl(wat): - if wat.startswith("/") or wat.startswith("index"): - return "http://ihned.cz" + wat - else: - return wat - - articles = {} # vysledek, asi - ans = [] # vsechny sekce - - articles["Hlavní"] = [] - ans.append("Hlavní") - - was = {} - - def parse_subpage(url, name): - articles[name] = [] - ans.append(name) - - soup = self.index_to_soup(url) - otvirak = soup.find(True, attrs={'class': ['otv']}) - if otvirak: - - # the code is copypasted here because I don't know python. - # simple as that. - a = otvirak.find('a', href=True) - title = self.tag_to_string(a, use_alt=True).strip() - txt = otvirak.find(True, attrs={'class': ['txt']}) - description = '' - if txt: - match = re.match( - r'

\s*([^<]*)\s*\s*([^<]*)\s*\s*([^<]*)\s*Souvisej.*', re.DOTALL | re.IGNORECASE), - lambda match: ''), - ] - - def print_version(self, url): - m = re.search('(?<=ID=)[0-9]*', url) - - return u'http://www.iliteratura.cz/clanek.asp?polozkaID=' + str(m.group(0)) + '&c=tisk' - - extra_css = ''' - h1 {font-size:140%;font-family:georgia,serif; font-weight:bold} - h3 {font-size:115%;font-family:georgia,serif; font-weight:bold} - ''' diff --git a/recipes/india_today.recipe b/recipes/india_today.recipe index 23ec834516..e7b7c1b73c 100644 --- a/recipes/india_today.recipe +++ b/recipes/india_today.recipe @@ -103,7 +103,7 @@ class IndiaToday(BasicNewsRecipe): if 'description_short' in data: desc = '

' + data['description_short'] + '

\n' if data.get('author'): - author = ''.join([names['title'] for names in data['author']]) + author = ', '.join([names['title'] for names in data['author']]) if 'city' in data: city = data['city'] if 'datetime_updated' in data: diff --git a/recipes/indy_star.recipe b/recipes/indy_star.recipe deleted file mode 100644 index ebbe8d2465..0000000000 --- a/recipes/indy_star.recipe +++ /dev/null @@ -1,26 +0,0 @@ -from calibre.web.feeds.recipes import BasicNewsRecipe - - -class IndianapolisStar(BasicNewsRecipe): - title = u'Indianapolis Star' - oldest_article = 10 - auto_cleanup = True - language = 'en' - __author__ = 'Owen Kelly' - max_articles_per_feed = 100 - cover_url = u'http://www2.indystar.com/frontpage/images/today.jpg' - feeds = [(u'Community Headlines', u'http://www.indystar.com/apps/pbcs.dll/section?Category=LOCAL&template=rss'), - (u'News Headlines', - u'http://www.indystar.com/apps/pbcs.dll/section?Category=NEWS&template=rss'), - (u'Business Headlines', - u'http://www.indystar.com/apps/pbcs.dll/section?Category=BUSINESS&template=rss'), - (u'Politics and Government', - u'http://www.indystar.com/apps/pbcs.dll/section?Category=NEWS05&template=rss'), - (u'Lifestyle Headlines', - u'http://www.indystar.com/apps/pbcs.dll/section?Category=LIVING&template=rss&mime=XML'), - (u'Opinion Headlines', - u'http://www.indystar.com/apps/pbcs.dll/section?Category=OPINION&template=rss&mime=XML') - ] - - def print_version(self, url): - return url + '&template=printart' diff --git a/recipes/infomotori.recipe b/recipes/infomotori.recipe deleted file mode 100644 index 5de516a244..0000000000 --- a/recipes/infomotori.recipe +++ /dev/null @@ -1,50 +0,0 @@ -#!/usr/bin/env python -__license__ = 'GPL v3' -__author__ = 'Gabriele Marini, based on Darko Miletic' -__copyright__ = '2009-2010, Darko Miletic ' -description = 'On Line Motor News - 01-05-2010' - -''' -http://www.infomotori.it/ -''' -from calibre.ebooks.BeautifulSoup import BeautifulSoup -from calibre.web.feeds.news import BasicNewsRecipe - - -class infomotori(BasicNewsRecipe): - author = 'Gabriele Marini' - title = u'Infomotori' - cover = 'http://www.infomotori.com/content/files/anniversario_01.gif' - oldest_article = 31 - max_articles_per_feed = 100 - recursion = 100 - use_embedded_content = False - - language = 'it' - use_embedded_content = False - remove_javascript = True - no_stylesheets = True - language = 'it' - timefmt = '[%a, %d %b, %Y]' - - def print_version(self, url): - raw = self.browser.open(url).read() - soup = BeautifulSoup(raw.decode('utf8', 'replace')) - print_link = soup.find('a', {'class': 'printarticle'}) - - return print_link['href'] - - feeds = [(u'Ultime Novit\xe0', u'http://feeds.infomotori.com/ultimenovita'), - (u'Auto: Ultime Novit\xe0 ', - u'http://feeds.infomotori.com/autonovita'), - (u'Moto: Ultime Novit\xe0 Moto', - u'http://feeds.infomotori.com/motonovita'), - (u'Notizie Flash', - u'http://feeds.infomotori.com/infoflashmotori'), - (u'Veicoli Ecologici e Mobilit\xe0 Sostenibile', - u'http://feeds.infomotori.com/ecomotori'), - (u'4x4 Fuoristrada, Crossover e Suv', - u'http://feeds.infomotori.com/fuoristrada'), - (u'Shopping Motori', - u'http://feeds.infomotori.com/shoppingmotori') - ] diff --git a/recipes/infoworld.recipe b/recipes/infoworld.recipe deleted file mode 100644 index 3873576490..0000000000 --- a/recipes/infoworld.recipe +++ /dev/null @@ -1,25 +0,0 @@ -#!/usr/bin/env python - -__license__ = 'GPL v3' -__copyright__ = '2009, Rick Kellogg' -''' -Infoworld.com -''' - -from calibre.web.feeds.news import BasicNewsRecipe - - -class Engadget(BasicNewsRecipe): - title = u'Infoworld.com' - __author__ = 'Rick Kellogg' - description = 'news' - language = 'en' - oldest_article = 7 - max_articles_per_feed = 100 - no_stylesheets = True - use_embedded_content = False - auto_cleanup = True - - feeds = [(u'Top Tech Stories', u'http://www.infoworld.com/index.rss'), - (u'Today\'s Tech Headlines', - u'http://www.infoworld.com/news/index.rss')] diff --git a/recipes/infra_pl.recipe b/recipes/infra_pl.recipe deleted file mode 100644 index e705539191..0000000000 --- a/recipes/infra_pl.recipe +++ /dev/null @@ -1,27 +0,0 @@ -from calibre.web.feeds.news import BasicNewsRecipe - - -class INFRA(BasicNewsRecipe): - title = u'INFRA' - oldest_article = 7 - max_articles_per_feed = 100 - __author__ = 'fenuks' - description = u'Serwis Informacyjny INFRA - UFO, Zjawiska Paranormalne, Duchy, Tajemnice świata.' - cover_url = 'http://i.imgur.com/j7hJT.jpg' - category = 'UFO' - index = 'http://infra.org.pl' - language = 'pl' - max_articles_per_feed = 100 - remove_empty_feeds = True - remove_attrs = ['style'] - no_stylesheets = True - keep_only_tags = [dict(id='ja-current-content')] - feeds = [(u'Najnowsze wiadomo\u015bci', u'http://www.infra.org.pl/rss')] - - def preprocess_html(self, soup): - for item in soup.findAll(style=True): - del item['style'] - for a in soup.findAll('a', href=True): - if 'http://' not in a['href'] and 'https://' not in a['href']: - a['href'] = self.index + a['href'] - return soup diff --git a/recipes/inopressa.recipe b/recipes/inopressa.recipe deleted file mode 100644 index 6cee3e18c2..0000000000 --- a/recipes/inopressa.recipe +++ /dev/null @@ -1,25 +0,0 @@ -# vim:fileencoding=UTF-8 -from __future__ import unicode_literals - -from calibre.web.feeds.news import BasicNewsRecipe - - -class Dingoo(BasicNewsRecipe): - language = 'ru' - __author__ = 'bug_me_not' - title = u'\u0418\u043d\u043e\u041f\u0440\u0435\u0441\u0441\u0430' - description = 'Иностранная пресса о России и не только' - cover_url = u'http://static.inopressa.ru/img/inopressa-logo-light.gif' - language = 'ru' - oldest_article = 14 - max_articles_per_feed = 200 - - no_stylesheets = False - remove_javascript = True - auto_cleanup = True - - feeds = [(u'\u043e \u0420\u043e\u0441\u0441\u0438\u0438 \u0438 \u043d\u0435 \u0442\u043e\u043b\u044c\u043a\u043e', - u'http://pda.inopressa.ru/rss')] - - remove_tags_before = dict(name='div', attrs={'class': 'article'}) - remove_tags_after = dict(name='div', attrs={'class': 'source'}) diff --git a/recipes/insider.recipe b/recipes/insider.recipe deleted file mode 100644 index 7a53daca3c..0000000000 --- a/recipes/insider.recipe +++ /dev/null @@ -1,59 +0,0 @@ -# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai -from __future__ import unicode_literals - -import re - -from calibre.web.feeds.news import BasicNewsRecipe - - -class insider(BasicNewsRecipe): - __author__ = 'bubak' - title = 'Insider' - language = 'cs' - - remove_tags = [dict(name='div', attrs={'class': 'article-related-content'}), dict(name='div', attrs={'class': 'calendar'}), dict(name='span', attrs={'id': 'labelHolder'}) # noqa - ] - - no_stylesheets = True - keep_only_tags = [ - dict(name='div', attrs={'class': ['doubleBlock textContentFormat']})] - - preprocess_regexps = [ - (re.compile(r'T.mata:.*', re.DOTALL | re.IGNORECASE), lambda m: '')] - needs_subscription = True - - def get_browser(self): - br = BasicNewsRecipe.get_browser(self) - br.open('http://www.denikinsider.cz/') - br.select_form(nr=0) - br['login-name'] = self.username - br['login-password'] = self.password - res = br.submit() - raw = res.read() - if u'Odhlásit se'.encode('utf-8') not in raw: - raise ValueError('Failed to login to insider.cz' - 'Check your username and password.') - return br - - def parse_index(self): - articles = [] - - soup = self.index_to_soup('http://www.denikinsider.cz') - titles = soup.findAll('span', attrs={'class': 'homepageArticleTitle'}) - if titles is None: - raise ValueError('Could not find category content') - - articles = [] - seen_titles = set() - for title in titles: - if title.string in seen_titles: - continue - article = title.parent - seen_titles.add(title.string) - url = article['href'] - if url.startswith('/'): - url = 'http://www.denikinsider.cz/' + url - self.log('\tFound article:', title, 'at', url) - articles.append({'title': title.string, 'url': url, 'description': '', - 'date': ''}) - return [(self.title, articles)] diff --git a/recipes/io9.recipe b/recipes/io9.recipe deleted file mode 100644 index 19801d0fbc..0000000000 --- a/recipes/io9.recipe +++ /dev/null @@ -1,36 +0,0 @@ -__license__ = 'GPL v3' -__copyright__ = '2010, NA' -''' -io9.com -''' - -from calibre.web.feeds.news import BasicNewsRecipe - - -class i09(BasicNewsRecipe): - title = 'io9' - __author__ = 'NA' - description = "io9, We Come From the Future." - publisher = 'io9.com' - category = 'news, science, education' - oldest_article = 2 - max_articles_per_feed = 100 - no_stylesheets = True - encoding = 'utf-8' - use_embedded_content = False - auto_cleanup = True - language = 'en' - masthead_url = 'http://cache.gawkerassets.com/assets/io9.com/img/logo.png' - extra_css = ''' - body{font-family: "Lucida Grande",Helvetica,Arial,sans-serif} - img{margin-bottom: 1em} - h1{font-family :Arial,Helvetica,sans-serif; font-size:large} - ''' - conversion_options = { - 'comment': description, 'tags': category, 'publisher': publisher, 'language': language - } - - feeds = [(u'Articles', u'http://feeds.gawker.com/io9/vip?format=xml')] - - def preprocess_html(self, soup): - return self.adeify_images(soup) diff --git a/recipes/ionline_pt.recipe b/recipes/ionline_pt.recipe deleted file mode 100644 index 4287f52e3e..0000000000 --- a/recipes/ionline_pt.recipe +++ /dev/null @@ -1,53 +0,0 @@ - -__license__ = 'GPL v3' -__copyright__ = '2010, Darko Miletic ' -''' -www.ionline.pt -''' - -from calibre.web.feeds.news import BasicNewsRecipe - - -class IOnline_pt(BasicNewsRecipe): - title = 'ionline - Portugal' - __author__ = 'Darko Miletic' - description = 'News from Portugal' - publisher = 'ionline.pt' - oldest_article = 2 - max_articles_per_feed = 100 - no_stylesheets = True - encoding = 'utf-8' - use_embedded_content = False - language = 'pt' - extra_css = ' .publish{font-style: italic; line-height: 1.2em; border-bottom: 1px dotted; padding: 5px 0} .entity{line-height: 1.2em} .overview{line-height:1.2em} ' # noqa - - conversion_options = { - 'comment': description, 'publisher': publisher, 'language': language - } - - keep_only_tags = [ - dict(name=['h5', 'h1']), dict(name='div', attrs={ - 'class': ['publish', 'overview', 'entity']}) - ] - - remove_tags = [ - dict(name=['object', 'embed', 'iframe']) - ] - - feeds = [ - - (u'Portugal', u'http://www.ionline.pt/rss/portugal.xml'), - (u'Mundo', u'http://www.ionline.pt/rss/mundo.xml'), - (u'Dinheiro', u'http://www.ionline.pt/rss/dinheiro.xml'), - (u'Desporto', u'http://www.ionline.pt/rss/desporto.xml'), - (u'Boa Vida', u'http://www.ionline.pt/rss/boavida.xml'), - (u'iReporter', u'http://www.ionline.pt/rss/ireporter.xml'), - (u'iBloges', u'http://www.ionline.pt/rss/iblogues.xml'), - (u'Desporto', u'http://www.ionline.pt/rss/desporto.xml') - ] - - def print_version(self, url): - rest = url.rpartition('/')[2] - lmain = rest.partition('-')[0] - lurl = u'http://www.ionline.pt/interior/index.php?p=news-print&idNota=' + lmain - return lurl diff --git a/recipes/ivanamilakovic.recipe b/recipes/ivanamilakovic.recipe deleted file mode 100644 index 03c2cf7ac0..0000000000 --- a/recipes/ivanamilakovic.recipe +++ /dev/null @@ -1,42 +0,0 @@ -# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai - -__license__ = 'GPL v3' -__copyright__ = '2012, Darko Miletic ' -''' -ivanamilakovic.blogspot.com -''' - -import re - -from calibre.web.feeds.news import BasicNewsRecipe - - -class IvanaMilakovic(BasicNewsRecipe): - title = u'Ivana Milaković' - __author__ = 'Darko Miletic' - description = u'Hronika mačijeg škrabala - priče, inspiracija, knjige, pisanje, prevodi...' - oldest_article = 80 - max_articles_per_feed = 100 - language = 'sr' - encoding = 'utf-8' - no_stylesheets = True - use_embedded_content = True - publication_type = 'blog' - extra_css = """ - @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} - body{font-family: Arial,Tahoma,Helvetica,FreeSans,sans1,sans-serif} - img{margin-bottom: 0.8em; border: 1px solid #333333; padding: 4px } - """ - - conversion_options = { - 'comment': description, 'tags': 'knjige, blog, srbija, sf', 'publisher': 'Ivana Milakovic', 'language': language - } - - preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')] - - feeds = [(u'Posts', u'http://ivanamilakovic.blogspot.com/feeds/posts/default')] - - def preprocess_html(self, soup): - for item in soup.findAll(style=True): - del item['style'] - return self.adeify_images(soup) diff --git a/recipes/scientific_american.recipe b/recipes/scientific_american.recipe index 72f2985313..4a0374ec70 100644 --- a/recipes/scientific_american.recipe +++ b/recipes/scientific_american.recipe @@ -26,10 +26,9 @@ class ScientificAmerican(BasicNewsRecipe): extra_css = """ [class^="article_dek-"] { font-style:italic; color:#202020; } [class^="article_authors-"] {font-size:small; color:#202020; } - [class^="article__image-"] { font-size:small; text-align:center; } - [class^="lead_image-"] { font-size:small; text-align:center; } + [class^="article__image-"], [class^="lead_image-"], .calibre-nuked-tag-figcaption { font-size:small; } [class^="bio-"] { font-size:small; color:#404040; } - em { color:#202020; } + em, blockquote { color:#202020; } """ needs_subscription = "optional" @@ -44,9 +43,11 @@ class ScientificAmerican(BasicNewsRecipe): ] def preprocess_html(self, soup): + for h2 in soup.findAll(['h2', 'h3']): + h2.name = 'h4' for fig in soup.findAll('figcaption'): - for p in fig.findAll(['p', 'div']): - p.name = 'span' + for p in fig.findAll('p'): + p.name = 'div' res = '?w=600' w = self.recipe_specific_options.get('res') if w and isinstance(w, str):