From 8f7e2faa89792094d539ac87d970bda6758d68ce Mon Sep 17 00:00:00 2001 From: unkn0w7n <51942695+unkn0w7n@users.noreply.github.com> Date: Mon, 14 Oct 2024 09:47:36 +0530 Subject: [PATCH 1/2] Update swarajya.recipe --- recipes/swarajya.recipe | 86 ++++++++++++++++++++++++++++++----------- 1 file changed, 63 insertions(+), 23 deletions(-) diff --git a/recipes/swarajya.recipe b/recipes/swarajya.recipe index 76fd237026..48660cecb2 100644 --- a/recipes/swarajya.recipe +++ b/recipes/swarajya.recipe @@ -1,51 +1,91 @@ -from calibre.web.feeds.news import BasicNewsRecipe, classes +#!/usr/bin/env python +import re +import json + +from calibre.web.feeds.news import BasicNewsRecipe + + +def absurl(url): + if url.startswith('/'): + return 'https://swarajyamag.com' + url + return url + + +html_entities = {'"': '"', ''': "'", '<': '<', '>': '>', '&': '&'} class SwarajyaMag(BasicNewsRecipe): - title = u'Swarajya Magazine' + title = 'Swarajya Magazine' __author__ = 'unkn0wn' description = 'Swarajya - a big tent for liberal right of centre discourse that reaches out, engages and caters to the new India.' language = 'en_IN' - no_stylesheets = True remove_javascript = True use_embedded_content = False - remove_attributes = ['height', 'width', 'style'] encoding = 'utf-8' - keep_only_tags = [ - dict(name='article') - ] - - remove_tags = [ - dict(name=['svg', 'button', 'source']), - classes('swarajya_patron_block hs-tooltip-content hidden'), - ] + recipe_specific_options = { + 'issue': { + 'short': 'The edition URL ', + } + } def preprocess_html(self, soup): - for span in soup.findAll('span'): - if self.tag_to_string(span).strip() == 'Tags': - div = span.findParent('div') - if div: - div.extract() + for img in soup.findAll('img', attrs={'src': True}): + img['src'] = img['src'].split('?')[0] + '?w=600' return soup def parse_index(self): - soup = self.index_to_soup('https://swarajyamag.com/all-issues') - a = soup.find('a', href=lambda x: x and x.startswith('https://swarajyamag.com/issue/')) - url = a['href'] + d = self.recipe_specific_options.get('issue') + if d and isinstance(d, str): + url = d + else: + soup = self.index_to_soup('https://swarajyamag.com/all-issues') + a = soup.find('a', href=lambda x: x and x.startswith('/issue/')) + url = absurl(a['href']) self.log('Downloading issue:', url) - self.cover_url = a.img['src'] + soup = self.index_to_soup(url) ans = [] - for div in soup.findAll('div', attrs={'class':'rounded'}): + cont = soup.find(attrs={'id': 'container'}) + self.cover_url = ( + cont.find('a', href=lambda x: x and x.startswith('/issue/')) + .img['src'] + .split('?')[0] + + '?w=600' + ) + for div in cont.findAll('div', attrs={'class': 'rounded'}): url = div.findParent('a')['href'] if url.startswith('/'): url = 'https://swarajyamag.com' + url h4 = div.find('h4') title = self.tag_to_string(h4) - d = h4.next_sibling + d = h4.next_sibling.div desc = 'By ' + self.tag_to_string(d).strip() self.log(title, ' at ', url, '\n', desc) ans.append({'title': title, 'url': url, 'description': desc}) return [('Articles', ans)] + + def preprocess_raw_html(self, raw, url): + app = re.search( + r'', re.IGNORECASE | re.DOTALL), ''), - ] - - def preprocess_html(self, soup): - # Remove whole article if it's a "zapnet" (video) - if soup.find('h1', {'class': 'zapnet_title'}): - return None - # Reduce h2 titles to h3 - for title in soup.findAll('h2'): - title.name = 'h3' - return soup diff --git a/recipes/rusiahoy.recipe b/recipes/rusiahoy.recipe deleted file mode 100644 index 113afc5308..0000000000 --- a/recipes/rusiahoy.recipe +++ /dev/null @@ -1,43 +0,0 @@ -__license__ = 'GPL v3' -__copyright__ = '2010, Darko Miletic ' -''' -rusiahoy.com -''' - -from calibre.web.feeds.news import BasicNewsRecipe - - -class RusiaHoy(BasicNewsRecipe): - title = 'Rusia Hoy' - __author__ = 'Darko Miletic' - description = 'Noticias de Russia en castellano' - publisher = 'rusiahoy.com' - category = 'news, politics, Russia' - oldest_article = 7 - max_articles_per_feed = 200 - no_stylesheets = True - encoding = 'utf8' - use_embedded_content = False - language = 'es' - remove_empty_feeds = True - extra_css = """ - body{font-family: Arial,sans-serif } - .article_article_title{font-size: xx-large; font-weight: bold} - .article_date{color: black; font-size: small} - """ - - conversion_options = { - 'comment': description, 'tags': category, 'publisher': publisher, 'language': language - } - - remove_tags = [ - dict(name=['meta', 'link', 'iframe', 'base', 'object', 'embed'])] - keep_only_tags = [dict(attrs={'class': ['article_rubric_title', 'article_date', 'article_article_title', 'article_article_lead']}), dict(attrs={'class': 'article_article_text'}) ] # noqa - remove_attributes = ['align', 'width', 'height'] - - feeds = [(u'Articulos', u'http://rusiahoy.com/xml/index.xml')] - - def preprocess_html(self, soup): - for item in soup.findAll(style=True): - del item['style'] - return soup diff --git a/recipes/rynek_infrastruktury.recipe b/recipes/rynek_infrastruktury.recipe deleted file mode 100644 index 6c981640b6..0000000000 --- a/recipes/rynek_infrastruktury.recipe +++ /dev/null @@ -1,38 +0,0 @@ -#!/usr/bin/env python - -__license__ = 'GPL v3' -__author__ = 'teepel ' - -''' -http://www.rynekinfrastruktury.pl -''' - -from calibre.web.feeds.news import BasicNewsRecipe - - -class prawica_recipe(BasicNewsRecipe): - title = u'Rynek Infrastruktury' - __author__ = 'teepel ' - language = 'pl' - description = u'Portal "Rynek Infrastruktury" to źródło informacji o kluczowych elementach polskiej gospodarki: drogach, kolei, lotniskach, portach, telekomunikacji, energetyce, prawie i polityce, wzmocnione eksperckimi komentarzami kluczowych analityków.' # noqa - remove_empty_feeds = True - oldest_article = 1 - max_articles_per_feed = 50 - remove_javascript = True - no_stylesheets = True - - feeds = [ - (u'Drogi', u'http://www.rynekinfrastruktury.pl/rss/drogi.xml'), - (u'Kolej', u'http://www.rynekinfrastruktury.pl/rss/kolej.xml'), - (u'Energetyka', u'http://www.rynekinfrastruktury.pl/rss/energetyka.xml') - # no news in these feeds since 4 years: - # (u'Porty i lotniska', u'http://www.rynekinfrastruktury.pl/rss/porty-i-lotniska.xml'), - # (u'Komentarze', u'http://www.rynekinfrastruktury.pl/rss/komentarze-i-felietony.xml'), - ] - - keep_only_tags = [ - dict(name='h1', attrs={'class': 'wiadTit'}), - dict(name='div', attrs={'class': ['wiadSzczegol', 'multimediaWiadomosci', 'wiadTresc']}) - ] - - remove_tags = [dict(name='span', attrs={'class': 'kom'})] diff --git a/recipes/rynek_zdrowia.recipe b/recipes/rynek_zdrowia.recipe deleted file mode 100644 index fff9041c6e..0000000000 --- a/recipes/rynek_zdrowia.recipe +++ /dev/null @@ -1,35 +0,0 @@ -from calibre.web.feeds.news import BasicNewsRecipe - - -class rynekzdrowia(BasicNewsRecipe): - title = u'Rynek Zdrowia' - __author__ = u'spi630' - language = 'pl' - masthead_url = 'http://k.rynekzdrowia.pl/images/headerLogo.png' - cover_url = 'http://k.rynekzdrowia.pl/images/headerLogo.png' - oldest_article = 3 - max_articles_per_feed = 25 - no_stylesheets = True - auto_cleanup = True - remove_empty_feeds = True - - remove_tags_before = dict(name='h3') - - feeds = [ - (u'Finanse i Zarz\u0105dzanie', u'http://www.rynekzdrowia.pl/Kanal/finanse.html'), - (u'Inwestycje', u'http://www.rynekzdrowia.pl/Kanal/inwestycje.html'), - (u'Aparatura i wyposa\u017cenie', u'http://www.rynekzdrowia.pl/Kanal/aparatura.html'), - (u'Informatyka', u'http://www.rynekzdrowia.pl/Kanal/informatyka.html'), - (u'Prawo', u'http://www.rynekzdrowia.pl/Kanal/prawo.html'), - (u'Polityka zdrowotna', u'http://www.rynekzdrowia.pl/Kanal/polityka_zdrowotna.html'), - - (u'Ubezpieczenia Zdrowotne', u'http://www.rynekzdrowia.pl/Kanal/ubezpieczenia.html'), - (u'Farmacja', u'http://www.rynekzdrowia.pl/Kanal/farmacja.html'), - (u'Badania i rozw\xf3j', u'http://www.rynekzdrowia.pl/Kanal/badania.html'), - (u'Nauka', u'http://www.rynekzdrowia.pl/Kanal/nauka.html'), - (u'Po godzinach', u'http://www.rynekzdrowia.pl/Kanal/godziny.html'), - (u'Us\u0142ugi medyczne', u'http://www.rynekzdrowia.pl/Kanal/uslugi.html')] - - def print_version(self, url): - url = url.replace('.html', ',drukuj.html') - return url diff --git a/recipes/sa_gazeta.recipe b/recipes/sa_gazeta.recipe deleted file mode 100644 index aa465b530a..0000000000 --- a/recipes/sa_gazeta.recipe +++ /dev/null @@ -1,18 +0,0 @@ -#!/usr/bin/env python -# vim:fileencoding=utf-8 -from __future__ import absolute_import, division, print_function, unicode_literals - -from calibre.web.feeds.news import AutomaticNewsRecipe - - -class BasicUserRecipe1501589847(AutomaticNewsRecipe): - title = 'Sa gazeta' - oldest_article = 30 - max_articles_per_feed = 100 - auto_cleanup = True - language = 'sc' - __author__ = 'tzium' - - feeds = [ - ('Sa gazeta', 'http://www.sagazeta.info/feeds/posts/default'), - ] diff --git a/recipes/sabit_fikir.recipe b/recipes/sabit_fikir.recipe deleted file mode 100644 index a42b6ed393..0000000000 --- a/recipes/sabit_fikir.recipe +++ /dev/null @@ -1,14 +0,0 @@ -# -*- coding: utf-8 -*- - -from calibre.web.feeds.news import BasicNewsRecipe - - -class BasicUserRecipe1325259641(BasicNewsRecipe): - language = 'tr' - __author__ = 'asalet_r' - title = u'Sabit Fikir' - oldest_article = 7 - max_articles_per_feed = 20 - auto_cleanup = True - - feeds = [(u'Sabit Fikir', u'http://www.sabitfikir.com/rss.xml')] diff --git a/recipes/sage_news.recipe b/recipes/sage_news.recipe deleted file mode 100644 index b65ca524c0..0000000000 --- a/recipes/sage_news.recipe +++ /dev/null @@ -1,33 +0,0 @@ -from calibre.web.feeds.news import BasicNewsRecipe - - -class AdvancedUserRecipe1292550626(BasicNewsRecipe): - title = 'The Sage News - Satire' - __author__ = 'Brian Hahn' - description = 'News without boundaries, Satire' - oldest_article = 200 - max_articles_per_feed = 150 - no_stylesheets = True - use_embedded_content = False - publisher = 'The Sage News Network' - category = 'News, Alberta, Canada' - language = 'en_CA' - encoding = 'iso-8859-1' - cover_url = 'http://www.sagenews.ca/images/satire-cover.jpg' - remove_tags_before = dict(id='ContentPanel') - remove_tags_after = dict(id='ContentPanel') - remove_tags = [dict(name='div', attrs={'id': 'BottomAds'}), dict(name='div', attrs={ - 'id': 'moreStories'}), dict(name='div', attrs={'id': 'StoryNavigation'})] - extra_css = 'img { margin:5px }' - feeds = [ - ('Satire World', 'http://www.sagenews.ca/Satire-World.rss'), - ('Satire Politics', 'http://www.sagenews.ca/Satire-Politics.rss'), - ('Satire Justice', 'http://www.sagenews.ca/Satire-Justice.rss'), - ('Satire Health', 'http://www.sagenews.ca/Satire-Health.rss'), - ('Satire Environment', 'http://www.sagenews.ca/Satire-Environment.rss'), - ('Satire Living', 'http://www.sagenews.ca/Satire-Living.rss'), - ('Satire Sports', 'http://www.sagenews.ca/Satire-Sports.rss'), - ('Satire Business', 'http://www.sagenews.ca/Satire-Business.rss'), - ('Satire Agriculture', 'http://www.sagenews.ca/Satire-Agriculture.rss'), - ('Satire Humour', 'http://www.sagenews.ca/Satire-Humour.rss'), - ] diff --git a/recipes/sage_news_opinion.recipe b/recipes/sage_news_opinion.recipe deleted file mode 100644 index 141837b12a..0000000000 --- a/recipes/sage_news_opinion.recipe +++ /dev/null @@ -1,37 +0,0 @@ -from calibre.web.feeds.news import BasicNewsRecipe - - -class AdvancedUserRecipe1292550626(BasicNewsRecipe): - title = 'The Sage News - Opinion' - __author__ = 'Brian Hahn' - description = 'News without boundaries, Opinion' - oldest_article = 200 - max_articles_per_feed = 150 - no_stylesheets = True - use_embedded_content = False - publisher = 'The Sage News Network' - category = 'News, Alberta, Canada' - language = 'en_CA' - encoding = 'iso-8859-1' - cover_url = 'http://www.sagenews.ca/images/opinion-cover.jpg' - remove_tags_before = dict(id='ContentPanel') - remove_tags_after = dict(id='ContentPanel') - remove_tags = [dict(name='div', attrs={'id': 'BottomAds'}), dict(name='div', attrs={ - 'id': 'moreStories'}), dict(name='div', attrs={'id': 'StoryNavigation'})] - extra_css = 'img { margin:5px }' - feeds = [ - ('Editorial Comment', 'http://www.sagenews.ca/Editorial%20Comment.rss'), - ('Grumpy Old Man', 'http://www.sagenews.ca/Grumpy%20Old%20Man.rss'), - ('Bad Girl', 'http://www.sagenews.ca/Bad%20Girl.rss'), - ('Around the Edges with Dixie', - 'http://www.sagenews.ca/Around%20the%20Edges%20with%20Dixie.rss'), - ('Man Vs. World', 'http://www.sagenews.ca/Man%20Vs.%20World.rss'), - ('Opinion World', 'http://www.sagenews.ca/Opinion-World.rss'), - ('Opinion Politics', 'http://www.sagenews.ca/Opinion-Politics.rss'), - ('Opinion Justice', 'http://www.sagenews.ca/Opinion-Justice.rss'), - ('Opinion Health', 'http://www.sagenews.ca/Opinion-Health.rss'), - ('Opinion Environment', 'http://www.sagenews.ca/Opinion-Environment.rss'), - ('Opinion Living', 'http://www.sagenews.ca/Opinion-Living.rss'), - ('Opinion Sports', 'http://www.sagenews.ca/Opinion-Sports.rss'), - ('Opinion Business', 'http://www.sagenews.ca/Opinion-Business.rss'), - ] diff --git a/recipes/salonica_press_news.recipe b/recipes/salonica_press_news.recipe deleted file mode 100644 index 30864d9249..0000000000 --- a/recipes/salonica_press_news.recipe +++ /dev/null @@ -1,35 +0,0 @@ -from calibre.web.feeds.news import BasicNewsRecipe - - -class spn(BasicNewsRecipe): - title = u'Salonica Press News' - language = 'gr' - __author__ = "SteliosGero" - oldest_article = 3 - max_articles_per_feed = 100 - auto_cleanup = True - category = 'news, GR' - language = 'el' - - feeds = [ - (u'\u03a0\u03bf\u03bb\u03b9\u03c4\u03b9\u03ba\u03ae', u'http://www.spnews.gr/politiki?format=feed&type=rss'), - (u'\u039f\u03b9\u03ba\u03bf\u03bd\u03bf\u03bc\u03af\u03b1', u'http://www.spnews.gr/oikonomia?format=feed&type=rss'), - (u'\u0391\u03c5\u03c4\u03bf\u03b4\u03b9\u03bf\u03af\u03ba\u03b7\u03c3\u03b7', u'http://www.spnews.gr/aftodioikisi?format=feed&type=rss'), - (u'\u039a\u03bf\u03b9\u03bd\u03c9\u03bd\u03af\u03b1', u'http://www.spnews.gr/koinonia?format=feed&type=rss'), - (u'\u0391\u03b8\u03bb\u03b7\u03c4\u03b9\u03c3\u03bc\u03cc\u03c2', u'http://www.spnews.gr/sports?format=feed&type=rss'), - (u'\u0394\u03b9\u03b5\u03b8\u03bd\u03ae', u'http://www.spnews.gr/diethni?format=feed&type=rss'), - (u'\u03a0\u03bf\u03bb\u03b9\u03c4\u03b9\u03c3\u03bc\u03cc\u03c2', u'http://www.spnews.gr/politismos?format=feed&type=rss'), - (u'Media', u'http://www.spnews.gr/media-news?format=feed&type=rss'), - (u'\u0396\u03c9\u03ae', u'http://www.spnews.gr/zoi?format=feed&type=rss'), - - (u'\u03a4\u03b5\u03c7\u03bd\u03bf\u03bb\u03bf\u03b3\u03af\u03b1', u'http://spnews.gr/texnologia?format=feed&type=rss'), - (u'\u03a0\u03b5\u03c1\u03b9\u03b2\u03ac\u03bb\u03bb\u03bf\u03bd', u'http://spnews.gr/periballon?format=feed&type=rss'), - (u'\u03a0\u03b1\u03c1\u03b1\u03c0\u03bf\u03bb\u03b9\u03c4\u03b9\u03ba\u03ac', u'http://spnews.gr/parapolitika?format=feed&type=rss'), - (u'\u03a0\u03b1\u03c1\u03b1\u03b4\u03b7\u03bc\u03bf\u03c4\u03b9\u03ba\u03ac', u'http://spnews.gr/paradimotika?format=feed&type=rss'), - (u'\u03a0\u03b1\u03c1\u03b1\u03b1\u03b8\u03bb\u03b7\u03c4\u03b9\u03ba\u03ac', u'http://spnews.gr/parathlitika?format=feed&type=rss'), - (u'\u0391\u03c0\u03cc\u03c8\u03b5\u03b9\u03c2', u'http://spnews.gr/apopseis?format=feed&type=rss'), - (u'\u03a3\u03c5\u03bd\u03b5\u03cd\u03be\u03b5\u03b9\u03c2', u'http://spnews.gr/synenteykseis?format=feed&type=rss'), - (u'Alert!', u'http://spnews.gr/alert?format=feed&type=rss')] - - def print_version(self, url): - return url + '?tmpl=component&print=1&layout=default&page=' diff --git a/recipes/samanyolu_haber.recipe b/recipes/samanyolu_haber.recipe deleted file mode 100644 index 14143caac8..0000000000 --- a/recipes/samanyolu_haber.recipe +++ /dev/null @@ -1,53 +0,0 @@ -# -*- coding: utf-8 -*- - -from calibre.web.feeds.news import BasicNewsRecipe - - -class SHaber (BasicNewsRecipe): - - title = u'Samanyolu Haber' - __author__ = u'thomass' - description = ' Samanyolu Haber Sitesinden günlük haberler ' - oldest_article = 2 - max_articles_per_feed = 100 - no_stylesheets = True - auto_cleanup = True - encoding = 'utf-8' - publisher = 'thomass' - category = 'güncel, haber, türkçe' - language = 'tr' - publication_type = 'newspaper' - - conversion_options = { - 'tags': category, 'language': language, 'publisher': publisher, 'linearize_tables': True - } - extra_css = ' .Haber-Baslik-Yazisi {font-weight: bold; font-size: 9px} .Haber-Ozet-Yazisi{ font-family:sans-serif;font-weight: normal;font-size: 11px } #Haber{ font-family:sans-serif;font-weight: normal;font-size: 9px }.KirmiziText{ font-weight: normal;font-size: 5px }' # noqa - - cover_img_url = 'http://www.samanyoluhaber.com/include/logo.png' - masthead_url = 'http://www.samanyoluhaber.com/include/logo.png' - remove_empty_feeds = True - - feeds = [ - (u'Son Dakika', u'http://podcast.samanyoluhaber.com/sondakika.rss'), - (u'Gündem', u'http://podcast.samanyoluhaber.com/gundem.rss'), - (u'Politika ', u'http://podcast.samanyoluhaber.com/politika.rss'), - (u'Ekonomi', u'http://podcast.samanyoluhaber.com/ekonomi.rss'), - (u'Dünya', u'http://podcast.samanyoluhaber.com/dunya.rss'), - (u'Spor ', u'http://podcast.samanyoluhaber.com/spor.rss'), - (u'Sağlık', u'http://podcast.samanyoluhaber.com/saglik.rss'), - (u'Kültür', u'http://podcast.samanyoluhaber.com/kultur.rss'), - (u'Eğitim', u'http://podcast.samanyoluhaber.com/egitim.rss'), - (u'Ramazan', u'http://podcast.samanyoluhaber.com/ramazan.rss'), - (u'Yazarlar ', u'http://podcast.samanyoluhaber.com/yazarlar.rss'), - - - - ] - - def preprocess_html(self, soup): - for alink in soup.findAll('a'): - if alink.string is not None: - tstr = alink.string - alink.replaceWith(tstr) - return soup - diff --git a/recipes/samanyolu_teknoloji.recipe b/recipes/samanyolu_teknoloji.recipe deleted file mode 100644 index d05dfaf9a6..0000000000 --- a/recipes/samanyolu_teknoloji.recipe +++ /dev/null @@ -1,49 +0,0 @@ -# -*- coding: utf-8 -*- - -from calibre.web.feeds.news import BasicNewsRecipe - - -class SHaberTekno (BasicNewsRecipe): - - title = u'Samanyolu Teknoloji' - __author__ = u'thomass' - description = 'Samanyolu Teknoloji Haber Sitesinden haberler ' - oldest_article = 8 - max_articles_per_feed = 100 - no_stylesheets = True - encoding = 'utf-8' - publisher = 'thomass' - category = 'bilim, teknoloji, haber, türkçe' - language = 'tr' - publication_type = 'magazine' - - conversion_options = { - 'tags': category, 'language': language, 'publisher': publisher, 'linearize_tables': True - } - extra_css = ' .IcerikMetin{ font-family:sans-serif;font-weight: normal;font-size: 10px } .h1IcerikBaslik {font-weight: bold; font-size: 18px}' - - keep_only_tags = [ - dict(name='div', attrs={'class': ['IcerikBaslik', 'IcerikMetinDiv']})] - - cover_img_url = 'http://teknoloji.samanyoluhaber.com/resources/images/logo_s_digi.jpg' - masthead_url = 'http://teknoloji.samanyoluhaber.com/resources/images/logo_s_digi.jpg' - remove_empty_feeds = True - - feeds = [ - (u'GENEL', u'http://podcast.samanyoluhaber.com/Teknoloji.rss'), - (u'İNTERNET', u'http://open.dapper.net/services/shaberteknolojiinternet'), - (u'CEP TELEFONU', - u'http://open.dapper.net/services/shaberteknolojicep'), - (u'OYUN', u'http://open.dapper.net/services/shaberteknolojioyun'), - (u'DONANIM', u'http://open.dapper.net/services/httpopendappernetservicesshaberteknolojidonanim'), - (u'ÜRÜN İNCELEME', - u'http://open.dapper.net/services/shaberteknolojiurun'), - (u'ALIŞVERİŞ', u'http://open.dapper.net/services/shaberteknolojialisveris'), - (u'BİLİM & TEKNOLOJİ', - u'http://open.dapper.net/services/shaberteknolojibilim'), - (u'HABERLER', u'http://open.dapper.net/services/shaberteknolojihaber'), - - - - ] - diff --git a/recipes/sarajevo_x.recipe b/recipes/sarajevo_x.recipe deleted file mode 100644 index c5a97b369c..0000000000 --- a/recipes/sarajevo_x.recipe +++ /dev/null @@ -1,73 +0,0 @@ -__license__ = 'GPL v3' -__copyright__ = '2010, Darko Miletic ' - -''' -sarajevo-x.com -''' - -import re - -from calibre.ebooks.BeautifulSoup import NavigableString, Tag -from calibre.web.feeds.recipes import BasicNewsRecipe - - -def new_tag(soup, name, attrs=()): - impl = getattr(soup, 'new_tag', None) - if impl is not None: - return impl(name, attrs=dict(attrs)) - return Tag(soup, name, attrs=attrs or None) - - -class SarajevoX(BasicNewsRecipe): - title = 'Sarajevo-x.com' - __author__ = 'Darko Miletic' - description = 'Sarajevo-x.com - najposjeceniji bosanskohercegovacki internet portal' - publisher = 'InterSoft d.o.o.' - category = 'news, politics, Bosnia and Herzegovina,Sarajevo-x.com, internet, portal, vijesti, bosna i hercegovina, sarajevo' - oldest_article = 2 - delay = 1 - max_articles_per_feed = 100 - no_stylesheets = True - encoding = 'cp1250' - use_embedded_content = False - language = 'bs' - extra_css = ' @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: Arial,Verdana,Helvetica,sans1,sans-serif} .article_description{font-family: sans1, sans-serif} div#fotka{display: block} img{margin-bottom: 0.5em} ' # noqa - - conversion_options = { - 'comment': description, 'tags': category, 'publisher': publisher, 'language': language - } - - preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')] - - keep_only_tags = [dict(name='div', attrs={'class': 'content-bg'})] - remove_tags_after = dict(name='div', attrs={'class': 'izvor'}) - remove_tags = [dict(name=['object', 'link', 'base', 'table'])] - remove_attributes = ['height', 'width', 'alt', 'border'] - - feeds = [ - - (u'BIH', u'http://www.sarajevo-x.com/rss/bih'), - (u'Svijet', u'http://www.sarajevo-x.com/rss/svijet'), - (u'Biznis', u'http://www.sarajevo-x.com/rss/biznis'), - (u'Sport', u'http://www.sarajevo-x.com/rss/sport'), - (u'Showtime', u'http://www.sarajevo-x.com/rss/showtime'), - (u'Scitech', u'http://www.sarajevo-x.com/rss/scitech'), - (u'Lifestyle', u'http://www.sarajevo-x.com/rss/lifestyle'), - (u'Kultura', u'http://www.sarajevo-x.com/rss/kultura'), - (u'Zanimljivosti', u'http://www.sarajevo-x.com/rss/zanimljivosti') - ] - - def preprocess_html(self, soup): - dtag = soup.find('div', attrs={'id': 'fotka'}) - if dtag: - sp = soup.find('div', attrs={'id': 'opisslike'}) - img = soup.find('img') - if sp: - sp - else: - mtag = new_tag(soup, 'div', [ - ("id", "opisslike"), ("class", "opscitech")]) - mopis = NavigableString("Opis") - mtag.insert(0, mopis) - img.append(mtag) - return soup diff --git a/recipes/sardinia_post.recipe b/recipes/sardinia_post.recipe deleted file mode 100644 index 3a4b902f6c..0000000000 --- a/recipes/sardinia_post.recipe +++ /dev/null @@ -1,19 +0,0 @@ -#!/usr/bin/env python -# vim:fileencoding=utf-8 -from __future__ import absolute_import, division, print_function, unicode_literals - -from calibre.web.feeds.news import AutomaticNewsRecipe - - -class BasicUserRecipe1501589429(AutomaticNewsRecipe): - title = 'Sardinia Post (Lapis)' - oldest_article = 30 - max_articles_per_feed = 100 - auto_cleanup = True - language = 'sc' - __author__ = 'tzium' - - feeds = [ - ('Sardinia Post (Lapis)', - 'https://www.sardiniapost.it/category/lapis/feed'), - ] diff --git a/recipes/satira.recipe b/recipes/satira.recipe deleted file mode 100644 index 6d130853e7..0000000000 --- a/recipes/satira.recipe +++ /dev/null @@ -1,18 +0,0 @@ -__license__ = 'GPL v3' -from calibre.web.feeds.news import BasicNewsRecipe - - -class AdvancedUserRecipe1327351409(BasicNewsRecipe): - title = u'Satira' - oldest_article = 7 - max_articles_per_feed = 100 - auto_cleanup = True - feeds = [ - (u'spinoza', u'http://feeds.feedburner.com/Spinoza'), - (u'umore maligno', u'http://www.umoremaligno.it/feed/rss/'), - (u'fed-ex', u'http://exfed.tumblr.com/rss'), - (u'metilparaben', u'http://feeds.feedburner.com/metil'), - (u'freddy nietzsche', u'http://feeds.feedburner.com/FreddyNietzsche')] - __author__ = 'faber1971' - description = 'Collection of Italian satiric blogs - v1.00 (28, January 2012)' - language = 'it' diff --git a/recipes/sb_nation.recipe b/recipes/sb_nation.recipe deleted file mode 100644 index 00b33ecefa..0000000000 --- a/recipes/sb_nation.recipe +++ /dev/null @@ -1,57 +0,0 @@ -#!/usr/bin/env python - -__license__ = 'GPL v3' -__copyright__ = 'Zotzo' -''' -http://www.stumptownfooty.com/ -http://www.eightysixforever.com -http://www.sounderatheart.com -http://www.dailysoccerfix.com/ - -''' -from calibre.web.feeds.news import BasicNewsRecipe - - -class SBNation(BasicNewsRecipe): - title = u'SBNation' - __author__ = 'rylsfan' - description = u"More than 290 individual communities, each offering high quality year-round coverage and conversation led by fans who are passionate." - oldest_article = 3 - language = 'en' - max_articles_per_feed = 100 - no_stylesheets = True - use_embedded_content = False - - keep_only_tags = [ - dict(name='h2', attrs={'class': 'title'}), dict( - name='div', attrs={'class': 'entry-body'}) - ] - - remove_tags_after = dict( - name='div', attrs={'class': 'footline entry-actions'}) - remove_tags = [ - dict(name='div', attrs={'class': 'footline entry-actions'}), - {'class': 'extend-divide'} - ] - # SBNation has 300 special blogs to choose from. These are just a couple! - feeds = [ - (u'Daily Fix', u'http://www.dailysoccerfix.com/rss/'), - (u"Stumptown Footy", u'http://www.stumptownfooty.com/rss/'), - (u'Sounders', u'http://www.sounderatheart.com/rss/'), - (u'Whitecaps', u'http://www.eightysixforever.com/rss/'), - ] - - extra_css = """ - h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;} - h2{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;} - p{font-family:Helvetica,sans-serif; display: block; text-align: left; text-decoration: none; text-indent: 0%;} - body{font-family:Helvetica,Arial,sans-serif;font-size:small;} - """ - - def preprocess_html(self, soup): - return self.adeify_images(soup) - - def populate_article_metadata(self, article, soup, first): - h2 = soup.find('h2') - h2.replaceWith(h2.prettify() + '

By ' + - article.author + '

') diff --git a/recipes/schattenblick.recipe b/recipes/schattenblick.recipe deleted file mode 100644 index dd0b5dedb1..0000000000 --- a/recipes/schattenblick.recipe +++ /dev/null @@ -1,14 +0,0 @@ -from calibre.web.feeds.news import BasicNewsRecipe - - -class AdvancedUserRecipe1345802300(BasicNewsRecipe): - title = u'Online-Zeitung Schattenblick' - language = 'de' - __author__ = 'ThB' - publisher = u'MA-Verlag' - category = u'Nachrichten' - oldest_article = 7 - max_articles_per_feed = 100 - cover_url = 'http://www.schattenblick.de/mobi/rss/cover.jpg' - feeds = [(u'Schattenblick Tagesausgabe', - u'http://www.schattenblick.de/mobi/rss/rss.xml')] diff --git a/recipes/schwarzerpfeil.recipe b/recipes/schwarzerpfeil.recipe deleted file mode 100644 index aec7c85bd2..0000000000 --- a/recipes/schwarzerpfeil.recipe +++ /dev/null @@ -1,30 +0,0 @@ -# -*- mode: python; -*- -__license__ = 'GPL v3' - -''' -SchwarzerPfeil Calibre recipe. -''' - -from calibre.web.feeds.recipes import BasicNewsRecipe - - -class Schwarzerpfeil(BasicNewsRecipe): - title = 'SchwarzerPfeil' - __author__ = 'tastytea' - description = 'Das partizipative Mag von und für die antiautoritäre Bewegung' - publication_type = 'magazine' - language = 'de' - timefmt = ' [%d. %B %Y]' - max_articles_per_feed = 100 - oldest_article = 30 - use_embedded_content = True - no_stylesheets = True - auto_cleanup = False - - feeds = [ - ('Artikel', 'https://schwarzerpfeil.de/feed/'), - ('Kommentare', 'https://schwarzerpfeil.de/comments/feed/') - ] - - def get_cover_url(self): - return "https://schwarzerpfeil.de/wp-content/uploads/2020/09/12-1.png" diff --git a/recipes/sciencedaily.recipe b/recipes/sciencedaily.recipe deleted file mode 100644 index ee2dae291e..0000000000 --- a/recipes/sciencedaily.recipe +++ /dev/null @@ -1,40 +0,0 @@ -#!/usr/bin/env python -__license__ = 'GPL v3' -__copyright__ = '2008-2017, Darko Miletic ' -''' -sciencedaily.com -''' - -from calibre.web.feeds.news import BasicNewsRecipe - - -class ScienceDaily(BasicNewsRecipe): - title = u'ScienceDaily' - __author__ = u'Darko Miletic' - title = 'ScienceDaily' - description = ('ScienceDaily is one of the Internet\'s most popular ' - 'science news web sites. Since starting in 1995, the ' - 'award-winning site has earned the loyalty of students, ' - 'researchers, healthcare professionals, government ' - 'agencies, educators and the general public around the ' - 'world. Now with more than 6 million monthly visitors ' - 'worldwide, ScienceDaily generates nearly 20 million ' - 'page views a month and is steadily growing in its ' - 'global audience.') - category = 'medicin, healthcare' - oldest_article = 7 - max_articles_per_feed = 50 - auto_cleanup = True - language = 'en' - encoding = 'utf-8' - - # Feed are found here: https://www.sciencedaily.com/newsfeeds.htm - feeds = [ - ('Latest Science News', 'https://www.sciencedaily.com/rss/top.xml'), - ('All Top News', 'https://www.sciencedaily.com/rss/top/science.xml'), - ('Health News', 'https://www.sciencedaily.com/rss/top/health.xml'), - ('Technology News', 'https://www.sciencedaily.com/rss/top/technology.xml'), - ('Environment News', 'https://www.sciencedaily.com/rss/top/environment.xml'), - ('Society News', 'https://www.sciencedaily.com/rss/top/society.xml'), - ('Strange & Offbeat News', 'https://www.sciencedaily.com/rss/strange_offbeat.xml'), - ] diff --git a/recipes/seanhannity.recipe b/recipes/seanhannity.recipe deleted file mode 100644 index 01a5f45dcb..0000000000 --- a/recipes/seanhannity.recipe +++ /dev/null @@ -1,31 +0,0 @@ -#!/usr/bin/env python - -from calibre.web.feeds.recipes import BasicNewsRecipe - - -class SeanHannity(BasicNewsRecipe): - cover_url = 'http://www.hannity.com/images/misc_logo.gif' - title = u"Sean Hannity Show" - __author__ = 'Rob Lammert - rob.lammert[at]gmail.com' - description = u"Articles from Sean Hannity's website, www.hannity.com" - oldest_article = 7.0 - language = 'en' - max_articles_per_feed = 100 - recursions = 0 - encoding = 'utf8' - no_stylesheets = True - remove_javascript = True - - remove_tags = [ - dict(name='div', attrs={'id': [ - 'header', 'navsprite', 'topminibarad', 'headline_bar', 'shadow', 'footer']}), - dict(name='div', attrs={'class': 'rightcolumn'}), - dict(name='table', attrs={'id': 'audiobox'}), - dict(name='a', attrs={'title': ['Home', 'Shows', 'Guests', 'Photos']}), - dict(name='iframe') - ] - - feeds = [ - ('Content Feed', u'http://feeds.feedburner.com/TheSeanHannityShow-AllContent?format=xml') - ] - diff --git a/recipes/security_watch.recipe b/recipes/security_watch.recipe deleted file mode 100644 index 13c17f10b1..0000000000 --- a/recipes/security_watch.recipe +++ /dev/null @@ -1,19 +0,0 @@ -from calibre.web.feeds.news import BasicNewsRecipe - - -class SecurityWatch(BasicNewsRecipe): - title = u'securitywatch' - description = 'security news' - timefmt = ' [%d %b %Y]' - __author__ = 'Oliver Niesner' - no_stylesheets = True - oldest_article = 14 - max_articles_per_feed = 100 - use_embedded_content = False - language = 'en' - auto_cleanup = True - - feeds = [ - (u'securitywatch', - u'http://feeds.pcmag.com/Rss.aspx/SectionArticles?sectionId=28026') - ] diff --git a/recipes/serverside.recipe b/recipes/serverside.recipe deleted file mode 100644 index 1caa05e33d..0000000000 --- a/recipes/serverside.recipe +++ /dev/null @@ -1,33 +0,0 @@ -#!/usr/bin/env python - -__license__ = 'GPL v3' -__copyright__ = '2009, Rick Kellogg' -''' -TheServerSide.com -''' - -from calibre.web.feeds.news import BasicNewsRecipe - - -class Engadget(BasicNewsRecipe): - title = u'TheServerSide.com' - __author__ = 'Rick Kellogg' - description = 'news' - language = 'en' - oldest_article = 7 - max_articles_per_feed = 100 - no_stylesheets = True - use_embedded_content = False - - remove_tags = [dict(name='table', attrs={'class': ["head"]})] - - feeds = [(u'News', u'http://feeds.feedburner.com/techtarget/tsscom/home')] - - def get_article_url(self, article): - - url = article.get('guid', None) - - return url - - def print_version(self, url): - return url.replace('http://www.theserverside.com/news/thread.tss?thread_id=', 'http://www.theserverside.com/common/printthread.tss?thread_id=') diff --git a/recipes/sg_hu.recipe b/recipes/sg_hu.recipe deleted file mode 100644 index 68b027abb5..0000000000 --- a/recipes/sg_hu.recipe +++ /dev/null @@ -1,17 +0,0 @@ -from calibre.web.feeds.news import BasicNewsRecipe - - -class SGhu(BasicNewsRecipe): - title = u'SG.hu' - __author__ = 'davotibarna' - description = u'Informatika \xe9s Tudom\xe1ny' - language = 'hu' - oldest_article = 5 - max_articles_per_feed = 100 - no_stylesheets = True - encoding = 'utf-8' - - feeds = [(u'SG.hu', u'http://www.sg.hu/plain/rss.xml')] - - def print_version(self, url): - return url.replace('cikkek/', 'printer.php?cid=') diff --git a/recipes/shacknews.recipe b/recipes/shacknews.recipe deleted file mode 100644 index acf6043496..0000000000 --- a/recipes/shacknews.recipe +++ /dev/null @@ -1,17 +0,0 @@ - -from calibre.web.feeds.news import BasicNewsRecipe - - -class Shacknews(BasicNewsRecipe): - __author__ = 'Docbrown00' - __license__ = 'GPL v3' - title = u'Shacknews' - oldest_article = 7 - max_articles_per_feed = 100 - language = 'en' - no_stylesheets = True - auto_cleanup = True - - feeds = [ - (u'Latest News', u'http://www.shacknews.com/shackfeed.xml'), - ] diff --git a/recipes/shortlist.recipe b/recipes/shortlist.recipe deleted file mode 100644 index b8f7b9f772..0000000000 --- a/recipes/shortlist.recipe +++ /dev/null @@ -1,66 +0,0 @@ -import re - -from calibre.web.feeds.news import BasicNewsRecipe - - -class AdvancedUserRecipe1324663493(BasicNewsRecipe): - title = u'Shortlist' - description = 'Articles From Shortlist.com' - # I've set oldest article to 7 days as the website updates weekly - oldest_article = 8 - max_articles_per_feed = 20 - remove_empty_feeds = True - remove_javascript = True - no_stylesheets = True - ignore_duplicate_articles = {'title'} - - __author__ = 'Dave Asbury' - # last updated 7/10/12 - language = 'en_GB' - - def get_cover_url(self): - soup = self.index_to_soup('http://www.shortlist.com') - cov = soup.find(attrs={'width': '121'}) - # print '******** ',cov,' ***' - # cover_url = 'http://www.shortlist.com'+cov['src'] - cover_url = cov['src'] - return cover_url - - masthead_url = 'http://www.mediauk.com/logos/100/344096.png' - - preprocess_regexps = [ - (re.compile(r'…or.*?email to your friends.', re.IGNORECASE | re.DOTALL), lambda match: '')] - - keep_only_tags = [ - # dict(name='h1'), - dict(name='h2', attrs={'class': 'title'}), - dict(name='h3', atts={'class': 'subheading'}), - dict(attrs={'class': ['hero-static', 'stand-first']}), - dict(attrs={'class': 'hero-image'}), - dict(name='div', attrs={ - 'id': ['list', 'article', 'article alternate']}), - dict(name='div', attrs={'class': 'stand-first'}), - ] - remove_tags = [dict(name='h2', attrs={'class': 'graphic-header'}), - dict(attrs={ - 'id': ['share', 'twitter', 'facebook', 'digg', 'delicious', 'facebook-like']}), - dict(atts={'class': [ - 'related-content', 'related-content-item', 'related-content horizontal', 'more']}), - - ] - - remove_tags_after = [dict(name='p', attrs={'id': 'tags'}) - ] - - feeds = [ - # edit http://feed43.com/feed.html?name=3156308700147005 - # repeatable pattern =

{_}{%}{*}

- - (u'This Weeks Issue', u'http://feed43.com/5205766657404804.xml'), - (u'Home Page', u'http://feed43.com/3156308700147005.xml'), - (u'Cool Stuff', u'http://feed43.com/1557051772026706.xml'), - (u'Style', u'http://feed43.com/4168836374571502.xml'), - (u'Entertainment', u'http://feed43.com/4578504030588024.xml'), - - - ] diff --git a/recipes/sigma_live.recipe b/recipes/sigma_live.recipe deleted file mode 100644 index d34c43c551..0000000000 --- a/recipes/sigma_live.recipe +++ /dev/null @@ -1,14 +0,0 @@ -from calibre.web.feeds.news import BasicNewsRecipe - - -class sigmalive(BasicNewsRecipe): - title = u'SigmaLive' - __author__ = 'Stelios' - oldest_article = 7 - max_articles_per_feed = 100 - auto_cleanup = True - category = 'news, CY' - description = 'Cypriot News' - language = 'el' - encoding = 'utf8' - feeds = [(u'sigmalive', u'http://sigmalive.com/rss/latest')] diff --git a/recipes/sign_on_sd.recipe b/recipes/sign_on_sd.recipe deleted file mode 100644 index aefe92b71b..0000000000 --- a/recipes/sign_on_sd.recipe +++ /dev/null @@ -1,54 +0,0 @@ -from calibre.web.feeds.news import BasicNewsRecipe - - -class AdvancedUserRecipe1315899507(BasicNewsRecipe): - title = u'Sign On San Diego' - __author__ = 'Jay Kindle' - description = 'Local news stories from The San Diego Union-Tribune; breaking news, business and technology, local and national sports coverage, entertainment news and reviews.' # noqa - publisher = 'Tribune Company' - category = 'news, politics, USA, San Diego, California, world' - oldest_article = 2 - max_articles_per_feed = 200 - timefmt = ' [%b %d, %Y]' - no_stylesheets = True - encoding = 'utf8' - use_embedded_content = False - language = 'en' - auto_cleanup = True - remove_empty_feeds = True - publication_type = 'newspaper' - - feeds = [ - (u'Latest News', - u'http://www.sandiegouniontribune.com/latest/rss2.0.xml'), - (u'Business', - u'http://www.sandiegouniontribune.com/business/rss2.0.xml'), - (u'Politics', - u'http://www.sandiegouniontribune.com/news/politics/rss2.0.xml'), - (u'Immigration', - u'http://www.sandiegouniontribune.com/news/immigration/rss2.0.xml'), - (u'Courts', - u'http://www.sandiegouniontribune.com/news/public-safety/rss2.0.xml'), - (u'Education', - u'http://www.sandiegouniontribune.com/news/education/rss2.0.xml'), - (u'Sports', - u'http://www.sandiegouniontribune.com/sports/rss2.0.xml'), - (u'Chargers', - u'http://www.sandiegouniontribune.com/sports/chargers/rss2.0.xml'), - (u'Padres', - u'http://www.sandiegouniontribune.com/sports/padres/rss2.0.xml'), - (u'NFL', - u'http://www.sandiegouniontribune.com/sports/nfl/rss2.0.xml'), - (u'NBA', - u'http://www.sandiegouniontribune.com/sports/nba/rss2.0.xml'), - (u'Photos', - u'http://www.sandiegouniontribune.com/visuals/rss2.0.xml'), - (u'Entertainment', - u'http://www.sandiegouniontribune.com/entertainment/rss2.0.xml'), - (u'Books', - u'http://www.sandiegouniontribune.com/entertainment/books/rss2.0.xml'), - (u'Opinion', - u'http://www.sandiegouniontribune.com/opinion/rss2.0.xml'), - (u'Travel', - u'http://www.sandiegouniontribune.com/lifestyle/travel/rss2.0.xml'), - ] diff --git a/recipes/silicon_republic.recipe b/recipes/silicon_republic.recipe deleted file mode 100644 index 8623a18c35..0000000000 --- a/recipes/silicon_republic.recipe +++ /dev/null @@ -1,19 +0,0 @@ -__license__ = 'GPL v3' -__copyright__ = '2011 Neil Grogan' -# -# Silicon Republic Recipe -# - -from calibre.web.feeds.news import BasicNewsRecipe - - -class SiliconRepublic(BasicNewsRecipe): - title = u'Silicon Republic' - oldest_article = 7 - max_articles_per_feed = 100 - __author__ = u'Neil Grogan' - language = 'en_IE' - - remove_tags = [dict(attrs={'class': ['thumb', 'txt', 'compactbox', 'icons', 'catlist', 'catlistinner', 'taglist', 'taglistinner', 'social', 'also-in', 'also-in-inner', 'also-in-footer', 'zonek-dfp', 'paneladvert', 'rcadvert', 'panel', 'h2b']}), dict(id=['header', 'logo', 'header-right', 'sitesearch', 'rsslinks', 'topnav', 'topvideos', 'topvideos-list', 'topnews', 'topnews-list', 'slideshow', 'slides', 'compactheader', 'compactnews', 'compactfeatures', 'article-type', 'contactlinks-header', 'banner-zone-k-dfp', 'footer-related', 'directory-services', 'also-in-section', 'featuredrelated1', 'featuredrelated2', 'featuredrelated3', 'featuredrelated4', 'advert2-dfp']), dict(name=['script', 'style'])] # noqa - - feeds = [(u'News', u'http://www.siliconrepublic.com/feeds/')] diff --git a/recipes/singtao_daily.recipe b/recipes/singtao_daily.recipe deleted file mode 100644 index 3551a2799e..0000000000 --- a/recipes/singtao_daily.recipe +++ /dev/null @@ -1,79 +0,0 @@ -from calibre.web.feeds.recipes import BasicNewsRecipe - - -class AdvancedUserRecipe1278063072(BasicNewsRecipe): - title = u'Singtao Daily - Canada' - oldest_article = 7 - max_articles_per_feed = 100 - __author__ = 'rty' - description = 'Toronto Canada Chinese Newspaper' - publisher = 'news.singtao.ca' - category = 'Chinese, News, Canada' - remove_javascript = True - use_embedded_content = False - no_stylesheets = True - language = 'zh' - conversion_options = {'linearize_tables': True} - masthead_url = 'http://news.singtao.ca/i/site_2009/logo.jpg' - extra_css = ''' - @font-face {font-family: "DroidFont", serif, sans-serif; src: url(res:///system/fonts/DroidSansFallback.ttf); }\ - - body {text-align: justify; margin-right: 8pt; font-family: 'DroidFont', serif;}\ - - h1 {font-family: 'DroidFont', serif;}\ - - .articledescription {font-family: 'DroidFont', serif;} - ''' - keep_only_tags = [ - dict(name='div', attrs={'id': ['title', 'storybody']}), - dict(name='div', attrs={'class': 'content'}) - ] - - def parse_index(self): - feeds = [] - for title, url in [ - ('Editorial', - 'http://news.singtao.ca/toronto/editorial.html'), - ('Toronto \xe5\x9f\x8e\xe5\xb8\x82/\xe7\xa4\xbe\xe5\x8d\x80'.decode('utf-8'), - 'http://news.singtao.ca/toronto/city.html'), - ('Canada \xe5\x8a\xa0\xe5\x9c\x8b'.decode('utf-8'), - 'http://news.singtao.ca/toronto/canada.html'), - ('Entertainment', - 'http://news.singtao.ca/toronto/entertainment.html'), - ('World', - 'http://news.singtao.ca/toronto/world.html'), - ('Finance \xe5\x9c\x8b\xe9\x9a\x9b\xe8\xb2\xa1\xe7\xb6\x93'.decode('utf-8'), - 'http://news.singtao.ca/toronto/finance.html'), - ('Sports', 'http://news.singtao.ca/toronto/sports.html'), - ]: - articles = self.parse_section(url) - if articles: - feeds.append((title, articles)) - return feeds - - def parse_section(self, url): - soup = self.index_to_soup(url) - div = soup.find( - attrs={'class': ['newslist paddingL10T10', 'newslist3 paddingL10T10']}) - current_articles = [] - for li in div.findAll('li'): - a = li.find('a', href=True) - if a is None: - continue - title = self.tag_to_string(a) - url = a.get('href', False) - if not url or not title: - continue - if url.startswith('/'): - url = 'http://news.singtao.ca' + url - current_articles.append( - {'title': title, 'url': url, 'description': ''}) - - return current_articles - - def preprocess_html(self, soup): - for item in soup.findAll(style=True): - del item['style'] - for item in soup.findAll(width=True): - del item['width'] - return soup diff --git a/recipes/siol.recipe b/recipes/siol.recipe deleted file mode 100644 index c7d964cd9a..0000000000 --- a/recipes/siol.recipe +++ /dev/null @@ -1,58 +0,0 @@ -# coding: utf-8 -__license__ = 'GPL v3' -__copyright__ = '2010, BlonG' -''' -www.siol.si -''' -from calibre.web.feeds.news import BasicNewsRecipe - - -class Siol(BasicNewsRecipe): - title = u'Siol.net' - __author__ = u'BlonG' - description = "Multimedijski portal z aktualnimi vsebinami, intervjuji, komentarji iz Slovenije in sveta, sportal, trendi, avtomoto, blogos" - oldest_article = 3 - language = 'sl' - max_articles_per_feed = 20 - no_stylesheets = True - use_embedded_content = False - - cover_url = 'https://sites.google.com/site/javno2010/home/siol_cover.jpg' - - extra_css = ''' - h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;} - h2{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;} - p{font-family:Arial,Helvetica,sans-serif;font-size:small;} - body{font-family:Helvetica,Arial,sans-serif;font-size:small;} - ''' - - html2lrf_options = ['--base-font-size', '10'] - - keep_only_tags = [ - dict(name='div', attrs={'id': 'idContent'}), - ] - - remove_tags = [ - dict(name='span', attrs={'class': 'com1'}), - dict(name='div', attrs={'class': 'relation'}), - dict(name='p', attrs={'class': 'path'}), - dict(name='div', attrs={'class': 'clear_r'}), - dict(name='div', attrs={'id': 'appendix'}), - dict(name='div', attrs={'id': 'rail'}), - dict(name='div', attrs={'id': 'div_comments'}), - dict(name='div', attrs={'class': 'thumbs'}), - ] - - feeds = [ - - (u'Slovenija', u'http://www.siol.net/rss.aspx?path=Slovenija'), - (u'Lokalne novice', u'http://www.siol.net/rss.aspx?path=Slovenija/Lokalne_novice'), - (u'EU', u'http://www.siol.net/rss.aspx?path=EU'), - (u'Svet', u'http://www.siol.net/rss.aspx?path=Svet'), - (u'Gospodarstvo', u'http://www.siol.net/rss.aspx?path=Gospodarstvo'), - (u'Sportal', u'http://www.siol.net/rss.aspx?path=Sportal'), - (u'Trendi', u'http://www.siol.net/rss.aspx?path=Trendi'), - (u'Avtomoto', u'http://www.siol.net/rss.aspx?path=Avtomoto'), - (u'Tehnologija', u'http://www.siol.net/rss.aspx?path=Tehnologija'), - (u'TV / Film', u'http://www.siol.net/rss.aspx?path=TV') - ] diff --git a/recipes/sisainlive.recipe b/recipes/sisainlive.recipe deleted file mode 100644 index 63546cde63..0000000000 --- a/recipes/sisainlive.recipe +++ /dev/null @@ -1,49 +0,0 @@ -# -*- coding: utf-8 -*- -__license__ = 'GPL v3' -__copyright__ = '2015, Hoje Lee ' -''' -Profile to download SisaIN Live -''' -from calibre.web.feeds.news import BasicNewsRecipe - - -class SisaINLive(BasicNewsRecipe): - language = 'ko' - title = u'시사인 라이브' - description = u'시사인 라이브 기사' - __author__ = 'Hoje Lee' - oldest_article = 30 - max_articles_per_feed = 10 - auto_cleanup = True - """ - # manual cleanup - no_stylesheets = True - remove_javascript = True - - keep_only_tags = [ - dict(name='div', attrs ={'class':['View_Title']}), - dict(name='div', attrs ={'class':['View_Info']}), - dict(name='div', attrs ={'class':['View_Time']}), - dict(id='articleBody'), - ] - remove_tags = [ - dict(name='table', attrs ={'width':['320'], 'height':['265']}), - ] - """ - - feeds = [ - # (u'전체기사', 'http://www.sisainlive.com/rss.xml'), - (u'인기기사', 'http://www.sisainlive.com/rss/clickTop.xml'), - (u'커버스토리', 'http://www.sisainlive.com/rss/SRN121.xml'), - (u'특집', 'http://www.sisainlive.com/rss/SRN122.xml'), - (u'정치', 'http://www.sisainlive.com/rss/S1N15.xml'), - (u'경제', 'http://www.sisainlive.com/rss/S1N16.xml'), - (u'사회', 'http://www.sisainlive.com/rss/S1N17.xml'), - (u'문화', 'http://www.sisainlive.com/rss/S1N18.xml'), - (u'국제.한반도', 'http://www.sisainlive.com/rss/S1N4.xml'), - (u'실용.과학', 'http://www.sisainlive.com/rss/S1N6.xml'), - (u'휴먼&휴', 'http://www.sisainlive.com/rss/S1N19.xml'), - (u'인터뷰.오피니언', 'http://www.sisainlive.com/rss/S1N5.xml'), - (u'사진.만화', 'http://www.sisainlive.com/rss/S1N7.xml'), - (u'별책부록', 'http://www.sisainlive.com/rss/S1N14.xml'), - ] diff --git a/recipes/sizinti_derigisi.recipe b/recipes/sizinti_derigisi.recipe deleted file mode 100644 index ebf8e041dd..0000000000 --- a/recipes/sizinti_derigisi.recipe +++ /dev/null @@ -1,31 +0,0 @@ -# -*- coding: utf-8 -*- - -from calibre.web.feeds.news import BasicNewsRecipe - - -class TodaysZaman_en(BasicNewsRecipe): - title = u'Sızıntı Dergisi' - __author__ = u'thomass' - description = 'a Turkey based daily for national and international news in the fields of business, diplomacy, politics, culture, arts, sports and economics, in addition to commentaries, specials and features' # noqa - oldest_article = 30 - max_articles_per_feed = 80 - no_stylesheets = True - encoding = 'utf-8' - category = 'dergi, ilim, kültür, bilim,Türkçe' - language = 'tr' - publication_type = 'magazine' - - cover_img_url = 'http://www.sizinti.com.tr/images/sizintiprint.jpg' - masthead_url = 'http://www.sizinti.com.tr/images/sizintiprint.jpg' - remove_tags_before = dict(id='content-right') - - feeds = [ - (u'Sızıntı', u'http://www.sizinti.com.tr/rss'), - ] - - # def preprocess_html(self, soup): - # return self.adeify_images(soup) - # def print_version(self, url): #there is a problem caused by table format - # return - # url.replace('http://www.todayszaman.com/newsDetail_getNewsById.action?load=detay&', - # 'http://www.todayszaman.com/newsDetail_openPrintPage.action?') diff --git a/recipes/skanderborglokalavisen_dk.recipe b/recipes/skanderborglokalavisen_dk.recipe deleted file mode 100644 index 832edb1cd7..0000000000 --- a/recipes/skanderborglokalavisen_dk.recipe +++ /dev/null @@ -1,32 +0,0 @@ -#!/usr/bin/env python -# vim:fileencoding=utf-8 -# https://manual.calibre-ebook.com/news_recipe.html -from __future__ import absolute_import, division, print_function, unicode_literals - -from calibre.web.feeds.news import BasicNewsRecipe - -''' -Lokalavisen Skanderborg -''' - - -class SkanderborgLokalavisen_dk(BasicNewsRecipe): - __author__ = 'CoderAllan.github.com' - title = 'Lokalavisen Skanderborg' - description = 'Lokale og regionale nyheder, sport, kultur fra Skanderborg og omegn på skanderborg.lokalavisen.dk' - category = 'newspaper, news, localnews, sport, culture, Denmark' - oldest_article = 7 - max_articles_per_feed = 50 - auto_cleanup = True - language = 'da' - - feeds = [ - ('Seneste nyt fra Lokalavisen Skanderborg', 'http://skanderborg.lokalavisen.dk/section/senestenytrss'), - ('Seneste lokale nyheder fra Lokalavisen Skanderborg', 'http://skanderborg.lokalavisen.dk/section/senestelokalenyhederrss'), - ('Seneste sport fra Lokalavisen Skanderborg', 'http://skanderborg.lokalavisen.dk/section/senestesportrss'), - ('Seneste 112 nyheder fra Lokalavisen Skanderborg', 'http://skanderborg.lokalavisen.dk/section/seneste112rss'), - ('Seneste kultur nyheder fra Lokalavisen Skanderborg', 'http://skanderborg.lokalavisen.dk/section/senestekulturrss'), - ('Seneste læserbreve fra Lokalavisen Skanderborg', 'http://skanderborg.lokalavisen.dk/section/senestelaeserbreverss'), - - ] - diff --git a/recipes/skylife.recipe b/recipes/skylife.recipe deleted file mode 100644 index 344363cea4..0000000000 --- a/recipes/skylife.recipe +++ /dev/null @@ -1,31 +0,0 @@ -# -*- coding: utf-8 -*- - -from calibre.web.feeds.news import BasicNewsRecipe - - -class THY (BasicNewsRecipe): - - title = u'Skylife' - __author__ = u'thomass' - description = ' Türk Hava Yollarının yayınladığı aylık kültür dergisi (Fotoğrafları da içermesini isterseniz keep_only_tag''da belirttiğim kodu da ekleyin) ' # noqa - oldest_article = 32 - max_articles_per_feed = 100 - no_stylesheets = True - encoding = 'utf-8' - publisher = 'thomass' - category = 'genel kültür, gezi,Türkçe' - language = 'tr' - publication_type = 'magazine' - - conversion_options = { - 'comment': description, 'tags': category, 'publisher': publisher, 'language': language - } - # Fotoğrafları da eklemek için: dict(name='div', - # attrs={'id':['divResimler']}) - keep_only_tags = [dict(name='h3', attrs={'id': ['hpbaslik']}), dict( - name='p', attrs={'id': ['pyayin', 'hspot', 'picerik']})] - masthead_url = 'http://www.turkishairlines.com/static/img/skylife/logo.png' - remove_empty_feeds = True - remove_attributes = ['width', 'height'] - - feeds = [(u'SKYLIFE', u'http://feed43.com/7783278414103376.xml')] diff --git a/recipes/slate_star_codex.recipe b/recipes/slate_star_codex.recipe deleted file mode 100644 index 6f0bde61da..0000000000 --- a/recipes/slate_star_codex.recipe +++ /dev/null @@ -1,49 +0,0 @@ -#!/usr/bin/env python - -import re - -from calibre.web.feeds.news import BasicNewsRecipe - -__license__ = 'GPL v3' -__copyright__ = '2015, Ned Letcher ' - - -""" -calibre recipe for Slate Star Codex. -""" - - -class SlateStarCodex(BasicNewsRecipe): - title = u'Slate Star Codex' - description = 'IN A MAD WORLD, ALL BLOGGING IS PSYCHIATRY BLOGGING' - __author__ = 'Ned Letcher' - max_articles_per_feed = 20 - language = 'en' - encoding = 'utf-8' - no_stylesheets = True - # alternative candidate for keep_only_tags: - # [{'attrs':{'class':['pjgm-posttitle', 'pjgm-postmeta', 'pjgm-postcontent']}}] - keep_only_tags = [ - dict(name='div', attrs={'class': re.compile(r'\bpost\b')})] - remove_tags = [ - dict(name='div', attrs={'class': re.compile(r'\bsharedaddy\b')})] - - def get_archived_posts(self): - soup = self.index_to_soup('http://slatestarcodex.com/archives/') - entries = soup.findAll(attrs={'class': 'sya_postcontent'}) - - posts = [] - for entry in entries: - atag = entry.find('a') - url = atag['href'] - post = { - 'title': atag.contents[0], - 'url': url, - 'date': "-".join(url.strip('/').split('/')[-4:-1]), - } - posts.append(post) - return posts - - def parse_index(self): - posts = self.get_archived_posts() - return [[self.title, posts]] diff --git a/recipes/slovo.recipe b/recipes/slovo.recipe deleted file mode 100644 index b1c42b2459..0000000000 --- a/recipes/slovo.recipe +++ /dev/null @@ -1,41 +0,0 @@ -import re - -from calibre.web.feeds.news import BasicNewsRecipe - - -class SlovoRecipe(BasicNewsRecipe): - __license__ = 'GPL v3' - __author__ = 'Abelturd' - language = 'sk' - version = 1 - - title = u'SLOVO' - publisher = u'' - category = u'News, Newspaper' - description = u'Politicko-spolo\u010densk\xfd t\xfd\u017edenn\xedk' - encoding = 'Windows-1250' - - oldest_article = 1 - max_articles_per_feed = 100 - use_embedded_content = False - remove_empty_feeds = True - - no_stylesheets = True - remove_javascript = True - - feeds = [] - feeds.append((u'V\u0161etky \u010dl\xe1nky', - u'http://www.noveslovo.sk/rss.asp')) - - keep_only_tags = [] - remove_tags = [] - - preprocess_regexps = [ - (re.compile(r'', re.DOTALL | re.IGNORECASE), - lambda match: ''), - ] - - def print_version(self, url): - m = re.search('(?<=id=)[0-9]*', url) - - return u'http://www.noveslovo.sk/clanoktlac.asp?id=' + str(m.group(0)) diff --git a/recipes/sme.recipe b/recipes/sme.recipe deleted file mode 100644 index bd636b0120..0000000000 --- a/recipes/sme.recipe +++ /dev/null @@ -1,79 +0,0 @@ -import re - -from calibre.web.feeds.news import BasicNewsRecipe - - -class SmeRecipe(BasicNewsRecipe): - __license__ = 'GPL v3' - __author__ = 'kwetal' - language = 'sk' - version = 1 - - title = u'SME' - publisher = u'' - category = u'News, Newspaper' - description = u'News from Slovakia' - - oldest_article = 1 - max_articles_per_feed = 100 - use_embedded_content = False - remove_empty_feeds = True - - no_stylesheets = True - remove_javascript = True - - # Feeds from: http://rss.sme.sk/ - feeds = [] - feeds.append( - (u'Tituln\u00E1 strana', u'http://rss.sme.sk/rss/rss.asp?id=frontpage')) - feeds.append((u'Naj\u010D\u00EDtanej\u0161ie za 4 hodiny', - u'http://rss.sme.sk/rss/rss.asp?id=smenajcit4')) - feeds.append((u'Naj\u010D\u00EDtanej\u0161ie za 24 hod\u00EDn', - u'http://rss.sme.sk/rss/rss.asp?id=smenajcit24')) - feeds.append( - (u'Z domova', u'http://rss.sme.sk/rss/rss.asp?sek=smeonline&rub=online_zdom')) - feeds.append((u'Zahrani\u010Die', - u'http://rss.sme.sk/rss/rss.asp?sek=smeonline&rub=online_zahr')) - feeds.append((u'Z domova + zahrani\u010Die', - u'http://rss.sme.sk/rss/rss.asp?sek=smeonline')) - feeds.append((u'Ekonomika', u'http://rss.sme.sk/rss/rss.asp?sek=ekon')) - feeds.append((u'Kult\u00FAra', u'http://rss.sme.sk/rss/rss.asp?sek=kult')) - feeds.append( - (u'Koment\u00E1re', u'http://rss.sme.sk/rss/rss.asp?sek=koment')) - feeds.append((u'Volby', u'http://rss.sme.sk/rss/rss.asp?sek=eVolby')) - feeds.append((u'\u0160port', u'http://rss.sme.sk/rss/rss.asp?sek=sport')) - feeds.append((u'Futbal', u'http://rss.sme.sk/rss/rss.asp?sek=futbal')) - feeds.append((u'Hokej', u'http://rss.sme.sk/rss/rss.asp?sek=hokej')) - feeds.append((u'Po\u010D\u00EDta\u010De', - u'http://rss.sme.sk/rss/rss.asp?sek=pocit')) - feeds.append((u'Mobil', u'http://rss.sme.sk/rss/rss.asp?sek=mobil')) - feeds.append((u'Veda', u'http://rss.sme.sk/rss/rss.asp?sek=veda')) - feeds.append((u'Natankuj', u'http://rss.sme.sk/rss/rss.asp?sek=natankuj')) - feeds.append((u'Auto', u'http://rss.sme.sk/rss/rss.asp?sek=auto')) - feeds.append( - (u'Dom\u00E1cnos\u0165', u'http://rss.sme.sk/rss/rss.asp?sek=domac')) - feeds.append((u'\u017Dena', u'http://rss.sme.sk/rss/rss.asp?sek=zena')) - feeds.append((u'Z\u00E1bava', u'http://rss.sme.sk/rss/rss.asp?sek=zabava')) - feeds.append((u'Hry', u'http://rss.sme.sk/rss/rss.asp?sek=hry')) - - keep_only_tags = [] - keep_only_tags.append(dict(name='div', attrs={'id': 'contenth'})) - keep_only_tags.append(dict(name='div', attrs={'class': 'articlec col'})) - - remove_tags = [] - remove_tags.append( - dict(name='div', attrs={'id': re.compile('smeplayer.*')})) - - remove_tags_after = [dict(name='p', attrs={'class': 'autor_line'})] - - extra_css = ''' - @font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} - @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/LiberationSans.ttf)} - body {font-family: sans1, serif1;} - ''' - - def print_version(self, url): - parts = url.split('/') - id = parts[4] - - return u'http://korzar.sme.sk/clanok_tlac.asp?cl=' + str(id) diff --git a/recipes/smilezilla.recipe b/recipes/smilezilla.recipe deleted file mode 100644 index f483715515..0000000000 --- a/recipes/smilezilla.recipe +++ /dev/null @@ -1,68 +0,0 @@ -from __future__ import absolute_import, division, print_function, unicode_literals - -import os -import re - -from calibre.ptempfile import PersistentTemporaryDirectory -from calibre.web.feeds.news import BasicNewsRecipe - - -class SmileZilla(BasicNewsRecipe): - - title = 'SmileZilla' - language = 'en' - __author__ = "Will" - JOKES_INDEX = 'http://www.smilezilla.com/joke.do' - STORIES_INDEX = 'http://www.smilezilla.com/story.do' - description = 'Daily Jokes and funny stoires' - oldest_article = 1 - no_stylesheets = True - encoding = 'utf-8' - - remove_tags = [dict(name='table')] - - def _get_entry(self, soup): - return soup.find('form', attrs={'name': 'contentForm'}) - - def _get_section_title(self, soup): - title_div = soup.find('div', attrs={'class': 'title'}) - return self.tag_to_string(title_div).strip() - - def parse_index(self): - self.tdir = PersistentTemporaryDirectory() - - def as_soup(url): - soup = self.index_to_soup(url) - for img in soup.findAll('img', src=True): - if img['src'].startswith('/'): - img['src'] = 'http://www.smilezilla.com' + img['src'] - return soup - - articles = [] - - soup = as_soup(self.JOKES_INDEX) - jokes_entry = self._get_entry(soup) - section_title = self._get_section_title(soup) - todays_jokes = [] - for i, text in enumerate(re.findall(r'(.+?)') - f.write(text.encode('utf-8')) - todays_jokes.append({'title': title, 'url': 'file:///' + f.name}) - articles.append((section_title, todays_jokes)) - - soup = as_soup(self.STORIES_INDEX) - entry = self._get_entry(soup) - section_title = self._get_section_title(soup) - - todays_stories = [] - for i, text in enumerate(re.findall(r'(.+?)') - f.write(text.encode('utf-8')) - todays_stories.append({'title': title, 'url': 'file:///' + f.name}) - articles.append((section_title, todays_stories)) - - return articles diff --git a/recipes/sn_dk.recipe b/recipes/sn_dk.recipe deleted file mode 100644 index e8ac9a301e..0000000000 --- a/recipes/sn_dk.recipe +++ /dev/null @@ -1,27 +0,0 @@ -#!/usr/bin/env python -# vim:fileencoding=utf-8 -# https://manual.calibre-ebook.com/news_recipe.html -from __future__ import absolute_import, division, print_function, unicode_literals - -from calibre.web.feeds.news import BasicNewsRecipe - -''' -sn.dk -''' - - -class Sn_dk(BasicNewsRecipe): - __author__ = 'CoderAllan.github.com' - title = 'sn.dk' - description = 'Sjællandske medier' - category = 'newspaper, news, localnews, culture, Denmark' - oldest_article = 7 - max_articles_per_feed = 50 - auto_cleanup = True - language = 'da' - - feeds = [ - ('Nyheder', 'http://sn.dk/rss'), - - ] - diff --git a/recipes/snopes.recipe b/recipes/snopes.recipe deleted file mode 100644 index fe16b534a0..0000000000 --- a/recipes/snopes.recipe +++ /dev/null @@ -1,43 +0,0 @@ -__license__ = 'GPL v3' -__copyright__ = '2010, Starson17' -''' -snopes.com -''' -from calibre.web.feeds.recipes import BasicNewsRecipe - - -class Snopes(BasicNewsRecipe): - title = 'Snopes' - __author__ = 'Starson17' - description = 'Urban Legends' - oldest_article = 21 - max_articles_per_feed = 100 - no_stylesheets = True - use_embedded_content = False - encoding = 'utf8' - publisher = 'Snopes' - category = 'news, ' - language = 'en' - publication_type = 'newsportal' - remove_javascript = True - no_stylesheets = True - - conversion_options = { - 'comments': description, 'tags': category, 'language': language, 'publisher': publisher, 'linearize_tables': True - } - - keep_only_tags = [ - dict(name='h1'), - dict(name='div', attrs={'class': ['article_text']}), - ] - - feeds = [ - ('Snopes', 'http://www.snopes.com/info/whatsnew.xml'), - ] - - extra_css = ''' - h1{font-family:Trebuchet MS,Bookman Old Style,Arial;color:#75b570} - h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:medium;} - p{font-family:Arial,Helvetica,sans-serif;font-size:small;} - body{font-family:Arial,Helvetica,sans-serif;font-size:small;} - ''' diff --git a/recipes/socialdiva.recipe b/recipes/socialdiva.recipe deleted file mode 100644 index 9ad6997361..0000000000 --- a/recipes/socialdiva.recipe +++ /dev/null @@ -1,53 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- - -__license__ = 'GPL v3' -__copyright__ = u'2011' -''' -socialdiva.ro -''' - -from calibre.web.feeds.news import BasicNewsRecipe - - -class SocialDiva(BasicNewsRecipe): - title = u'Social Diva' - __author__ = u'Silviu Cotoara' - description = u'When in doubt, wear red' - publisher = 'Social Diva' - oldest_article = 5 - language = 'ro' - max_articles_per_feed = 100 - no_stylesheets = True - use_embedded_content = False - category = 'Ziare,Reviste,Femei' - encoding = 'utf-8' - cover_url = 'http://www.socialdiva.ro/images/logo.png' - - conversion_options = { - 'comments': description, 'tags': category, 'language': language, 'publisher': publisher - } - - keep_only_tags = [ - dict(name='div', attrs={'class': 'col-alpha mt5 content_articol'}), - dict(name='div', attrs={'class': 'mt5'}) - ] - - remove_tags = [ - dict(name='a', attrs={'class': ['comments float-left scroll mt5']}), - dict(name='a', attrs={'class': ['comments float-left scroll']}), - dict(name='div', attrs={ - 'class': ['rating-container relative float-left']}), - dict(name='div', attrs={'class': ['float-right social_articol']}) - ] - - remove_tags_after = [ - dict(name='a', attrs={'class': ['comments float-left scroll mt5']}) - ] - - feeds = [ - (u'Feeds', u'http://www.socialdiva.ro/rss.html') - ] - - def preprocess_html(self, soup): - return self.adeify_images(soup) diff --git a/recipes/soenderborglokalavisen_dk.recipe b/recipes/soenderborglokalavisen_dk.recipe deleted file mode 100644 index 42507b262b..0000000000 --- a/recipes/soenderborglokalavisen_dk.recipe +++ /dev/null @@ -1,32 +0,0 @@ -#!/usr/bin/env python -# vim:fileencoding=utf-8 -# https://manual.calibre-ebook.com/news_recipe.html -from __future__ import absolute_import, division, print_function, unicode_literals - -from calibre.web.feeds.news import BasicNewsRecipe - -''' -Lokalavisen Sønderborg -''' - - -class SoenderborgLokalavisen_dk(BasicNewsRecipe): - __author__ = 'CoderAllan.github.com' - title = 'Lokalavisen Sønderborg' - description = 'Lokale og regionale nyheder, sport, kultur fra Sønderborg og omegn på soenderborg.lokalavisen.dk' - category = 'newspaper, news, localnews, sport, culture, Denmark' - oldest_article = 7 - max_articles_per_feed = 50 - auto_cleanup = True - language = 'da' - - feeds = [ - ('Seneste nyt fra Lokalavisen Sønderborg', 'http://soenderborg.lokalavisen.dk/section/senestenytrss'), - ('Seneste lokale nyheder fra Lokalavisen Sønderborg', 'http://soenderborg.lokalavisen.dk/section/senestelokalenyhederrss'), - ('Seneste sport fra Lokalavisen Sønderborg', 'http://soenderborg.lokalavisen.dk/section/senestesportrss'), - ('Seneste 112 nyheder fra Lokalavisen Sønderborg', 'http://soenderborg.lokalavisen.dk/section/seneste112rss'), - ('Seneste kultur nyheder fra Lokalavisen Sønderborg', 'http://soenderborg.lokalavisen.dk/section/senestekulturrss'), - ('Seneste læserbreve fra Lokalavisen Sønderborg', 'http://soenderborg.lokalavisen.dk/section/senestelaeserbreverss'), - - ] - diff --git a/recipes/soldiers.recipe b/recipes/soldiers.recipe deleted file mode 100644 index d5e9f526c2..0000000000 --- a/recipes/soldiers.recipe +++ /dev/null @@ -1,42 +0,0 @@ - -__license__ = 'GPL v3' -__copyright__ = '2009, Darko Miletic ' -''' -www.army.mil/soldiers/ -''' - -from calibre.web.feeds.news import BasicNewsRecipe - - -class Soldiers(BasicNewsRecipe): - title = 'Soldiers' - __author__ = 'Darko Miletic' - description = 'The Official U.S. Army Magazine' - oldest_article = 30 - max_articles_per_feed = 100 - no_stylesheets = True - use_embedded_content = False - auto_cleanup = True - auto_cleanup_keep = '//div[@id="mediaWrapper"]' - simultaneous_downloads = 1 - delay = 4 - max_connections = 1 - encoding = 'utf-8' - publisher = 'U.S. Army' - category = 'news, politics, war, weapons' - language = 'en' - INDEX = 'http://www.army.mil/soldiers/' - - conversion_options = { - 'comment': description, 'tags': category, 'publisher': publisher, 'language': language - } - - feeds = [(u'Frontpage', u'http://www.army.mil/rss/2/')] - - def get_cover_url(self): - cover_url = None - soup = self.index_to_soup(self.INDEX) - cover_item = soup.find('img', attrs={'alt': 'Current Magazine Cover'}) - if cover_item: - cover_url = cover_item['src'] - return cover_url diff --git a/recipes/something_awful.recipe b/recipes/something_awful.recipe deleted file mode 100644 index cb585261d4..0000000000 --- a/recipes/something_awful.recipe +++ /dev/null @@ -1,89 +0,0 @@ -from calibre.web.feeds.news import BasicNewsRecipe - - -class SomethingAwfulRecipe(BasicNewsRecipe): - title = 'Something Awful' - __author__ = 'atordo' - description = 'The Internet Makes You Stupid' - cover_url = 'http://i.somethingawful.com/core/head-logo-bluegren.png' - oldest_article = 15 - max_articles_per_feed = 50 - auto_cleanup = False - no_stylesheets = True - remove_javascript = True - language = 'en' - use_embedded_content = False - remove_empty_feeds = True - publication_type = 'magazine' - reverse_article_order = True - - recursions = 1 - match_regexps = [r'^http://www.somethingawful.com/.+/.+/\d{1,2}/$'] - - remove_attributes = ['align', 'alt', 'valign'] - - keep_only_tags = [ - dict(name='div', attrs={'class': 'article_head'}), dict(name='div', attrs={ - 'class': 'organ article'}), dict(name='ul', attrs={'class': 'pager'}) - ] - - extra_css = ''' - .author{font-size:small} .date{font-size:small} - .byline{font-size:small} .font_big{font-size:large} - .compat5{font-weight:bold} .accentbox{background-color:#E3E3E3; border:solid black} - img{margin-bottom:0.4em; display:block; margin-left: auto; margin-right:auto} - ''' - - feeds = [ - - ('Photoshop Phriday', 'http://www.somethingawful.com/rss/photoshop-phriday.rss.xml'), - ('Comedy Goldmine', 'http://www.somethingawful.com/rss/comedy-goldmine.rss.xml'), - # ('The Flash Tub', 'http://www.somethingawful.com/rss/flash-tub.rss.xml') - # ('Downloads', 'http://www.somethingawful.com/rss/downloads.rss.xml') - # ('AwfulVision', 'http://www.somethingawful.com/rss/awfulvision.rss.xml') - ('Awful Link of the Day', 'http://www.somethingawful.com/rss/awful-links.rss.xml'), - ('Fake Something Awfuls', 'http://www.somethingawful.com/rss/fake-something-awful.rss.xml'), - ('The Barbarian\'s Dojo', 'http://www.somethingawful.com/rss/steve-sumner.rss.xml'), - ('The Great Goon Database', 'http://www.somethingawful.com/rss/great-goon-database.rss.xml'), - ('Livejournal Theater', 'http://www.somethingawful.com/rss/livejournal-theater.rss.xml'), - ('Joystick Token Healthpack', 'http://www.somethingawful.com/rss/token-healthpack.rss.xml'), - ('Webcam Ward', 'http://www.somethingawful.com/rss/webcam-ward.rss.xml'), - ('Features / Articles', 'http://www.somethingawful.com/rss/feature-articles.rss.xml'), - ('Guides', 'http://www.somethingawful.com/rss/guides.rss.xml'), - ('Legal Threats', 'http://www.somethingawful.com/rss/legal-threats.rss.xml'), - ('Pranks [ICQ]', 'http://www.somethingawful.com/rss/icq-pranks.rss.xml'), - ('State Og', 'http://www.somethingawful.com/rss/state-og.rss.xml'), - ('Everquest', 'http://www.somethingawful.com/rss/everquest.rss.xml'), - ('Pranks [Email]', 'http://www.somethingawful.com/rss/email-pranks.rss.xml'), - ('The Weekend Web', 'http://www.somethingawful.com/rss/weekend-web.rss.xml'), - ('Daily Dirt', 'http://www.somethingawful.com/rss/daily-dirt.rss.xml'), - ('The Art of Warcraft', 'http://www.somethingawful.com/rss/art-of-warcraft.rss.xml'), - ('Video Game Article', 'http://www.somethingawful.com/rss/video-game-article.rss.xml'), - ('The Awful Movie Database', 'http://www.somethingawful.com/rss/awful-movie-database.rss.xml'), - ('Pregame Wrapup', 'http://www.somethingawful.com/rss/pregame-wrapup.rss.xml'), - ('Second Life Safari', 'http://www.somethingawful.com/rss/second-life-safari.rss.xml'), - ('The Hogosphere', 'http://www.somethingawful.com/rss/hogosphere.rss.xml'), - ('Front Page News', 'http://www.somethingawful.com/rss/news.rss.xml'), - ('Forum Friday\'s Monday', 'http://www.somethingawful.com/rss/forum-fridays.rss.xml'), - ('Cliff Yablonski Hates You', 'http://www.somethingawful.com/rss/cliff-yablonski.rss.xml'), - ('Manifestos From the Internet', 'http://www.somethingawful.com/rss/manifestos-from-internet.rss.xml'), - ('Johnston Checks In', 'http://www.somethingawful.com/rss/levi-johnston.rss.xml'), - ('Twitter Tuesday', 'http://www.somethingawful.com/rss/twitter-tuesday.rss.xml'), - ('Music Article', 'http://www.somethingawful.com/rss/music-article.rss.xml'), - ('The Everdraed Showcase', 'http://www.somethingawful.com/rss/everdraed-showcase.xml'), - ('Reviews [Games]', 'http://www.somethingawful.com/rss/game-reviews.rss.xml'), - ('Reviews [Movies]', 'http://www.somethingawful.com/rss/movie-reviews.rss.xml'), - ('Rom Pit', 'http://www.somethingawful.com/rss/rom-pit.rss.xml'), - ('Truth Media [Reviews]', 'http://www.somethingawful.com/rss/truth-media-reviews.rss.xml'), - ('Truth Media [Flames]', 'http://www.somethingawful.com/rss/truth-media-flames.rss.xml'), - ('Awful Anime', 'http://www.somethingawful.com/rss/hentai-game-reviews.rss.xml'), - ('The Horrors of Pornography', 'http://www.somethingawful.com/rss/horrors-of-porn.rss.xml'), - ('Your Band Sucks', 'http://www.somethingawful.com/rss/your-band-sucks.rss.xml'), - ('Fashion SWAT', 'http://www.somethingawful.com/rss/fashion-swat.rss.xml'), - ('MMO Roulette', 'http://www.somethingawful.com/rss/mmo-roulette.rss.xml'), - ('The Most Awful', 'http://www.somethingawful.com/rss/most-awful.rss.xml'), - ('Garbage Day', 'http://www.somethingawful.com/rss/garbage-day.rss.xml'), - ('WTF, D&D!?', 'http://www.somethingawful.com/rss/dungeons-and-dragons.rss.xml'), - ('Current Releases', 'http://www.somethingawful.com/rss/current-movie-reviews.rss.xml'), - ('Awful Things for Sale', 'http://www.somethingawful.com/rss/awful-things-sale.xml') - ] diff --git a/recipes/sondagsavisen_dk.recipe b/recipes/sondagsavisen_dk.recipe deleted file mode 100644 index 4b26f3eebc..0000000000 --- a/recipes/sondagsavisen_dk.recipe +++ /dev/null @@ -1,26 +0,0 @@ -#!/usr/bin/env python -# vim:fileencoding=utf-8 -# https://manual.calibre-ebook.com/news_recipe.html -from __future__ import absolute_import, division, print_function, unicode_literals - -from calibre.web.feeds.news import BasicNewsRecipe - -''' -Søndagsavisen.dk -''' - - -class Sondagsavisen_dk(BasicNewsRecipe): - __author__ = 'CoderAllan.github.com' - title = 'Søndagsavisen.dk' - description = 'Danmarks mest læste avis' - category = 'newspaper, news, localnews, home, health, decoration, Denmark' - oldest_article = 7 - max_articles_per_feed = 50 - auto_cleanup = True - language = 'da' - - # Feed are found here: http://www.sondagsavisen.dk/ - feeds = [ - ('Søndagsavisen.dk', 'http://www.sondagsavisen.dk/feed'), - ] diff --git a/recipes/southernstar.recipe b/recipes/southernstar.recipe deleted file mode 100644 index dda6948750..0000000000 --- a/recipes/southernstar.recipe +++ /dev/null @@ -1,144 +0,0 @@ -#!/usr/bin/env python - -__license__ = 'GPL v3' -__copyright__ = '2012, watou' -''' -southernstar.ie -''' -import codecs -import os -import re -import tempfile - -from calibre.ebooks.BeautifulSoup import NavigableString, Tag -from calibre.web.feeds.news import BasicNewsRecipe - - -def new_tag(soup, name, attrs=()): - impl = getattr(soup, 'new_tag', None) - if impl is not None: - return impl(name, attrs=dict(attrs)) - return Tag(soup, name, attrs=attrs or None) - - -class TheSouthernStar(BasicNewsRecipe): - - title = 'The Southern Star' - __author__ = 'watou' - description = 'West Cork\'s leading news and information provider since 1889' - NEWS_INDEX = 'http://www.southernstar.ie/news.php' - LOCAL_NOTES = 'http://www.southernstar.ie/localnotes.php' - SPORT_INDEX = 'http://www.southernstar.ie/sport.php' - CLASSIFIEDS = 'http://www.southernstar.ie/classifieds.php' - language = 'en_IE' - encoding = 'cp1252' - - publication_type = 'newspaper' - masthead_url = 'http://www.southernstar.ie/images/logo.gif' - remove_tags_before = dict(name='div', attrs={'class': 'article'}) - remove_tags_after = dict(name='div', attrs={'class': 'article'}) - remove_tags = [dict(name='div', attrs={'style': 'width:300px; position:relative'}), - dict(name='form'), - dict(name='div', attrs={'class': 'endpanel'})] - no_stylesheets = True - tempfiles = [] - pubdate = '' - - preprocess_regexps = [(re.compile(r'', re.DOTALL), lambda m: '')] - - def parse_index(self): - feeds = [] - seen_titles = set() - - articles = self.fetch_ss_articles(self.NEWS_INDEX, seen_titles) - if articles: - feeds.append(('News', articles)) - - articles = self.fetch_ss_notes(self.LOCAL_NOTES) - if articles: - feeds.append(('Local Notes', articles)) - - articles = self.fetch_ss_articles(self.SPORT_INDEX, seen_titles) - if articles: - feeds.append(('Sport', articles)) - - articles = self.fetch_ss_notes(self.CLASSIFIEDS) - if articles: - feeds.append(('Classifieds', articles)) - - return feeds - - def fetch_ss_articles(self, index, seen_titles): - articles = [] - soup = self.index_to_soup(index) - ts = soup.find('div', {'class': 'article'}) - ds = self.tag_to_string(ts.find('strong')) - self.pubdate = ' [' + ds + ']' - self.timefmt = ' [%s]' % ds - - for post in ts.findAll('h1'): - a = post.find('a', href=True) - title = self.tag_to_string(a) - if title in seen_titles: - continue - seen_titles.add(title) - url = a['href'] - if url.startswith('article'): - url = 'http://www.southernstar.ie/' + url - self.log('\tFound article:', title, 'at', url) - p = post.findNextSibling('p') - desc = None - if p is not None: - desc = str(p) - articles.append({'title': title, 'url': url, 'description': desc, - 'date': self.pubdate}) - - return articles - - def fetch_ss_notes(self, page): - articles = [] - - soup = self.index_to_soup(page) - ts = soup.find('div', {'class': 'content'}) - for post in ts.findAll('h1'): - title = self.tag_to_string(post) - self.log('\tFound note:', title) - f = tempfile.NamedTemporaryFile(suffix='.html', delete=False) - f.close() - f = codecs.open(f.name, 'w+b', self.encoding, 'replace') - url = "file://" + f.name - f.write(u'

' + title + '

') - f.write(str(post.findNextSibling('p'))) - f.write(u'') - self.log('\tWrote note to', f.name) - f.close() - self.tempfiles.append(f) - articles.append({'title': title, 'url': url, 'date': self.pubdate}) - - return articles - - def postprocess_html(self, soup, first): - for table in soup.findAll('table', align='right'): - img = table.find('img') - if img is not None: - img.extract() - caption = self.tag_to_string(table).strip() - div = new_tag(soup, 'div') - div['style'] = 'text-align:center' - div.insert(0, img) - div.insert(1, new_tag(soup, 'br')) - if caption: - div.insert(2, NavigableString(caption)) - table.replaceWith(div) - - return soup - - def image_url_processor(self, baseurl, url): - return url.replace(' ', '%20') - - def cleanup(self): - self.log('cleaning up') - for f in self.tempfiles: - os.unlink(f.name) - self.tempfiles = [] diff --git a/recipes/spin_magazine.recipe b/recipes/spin_magazine.recipe deleted file mode 100644 index 8ff4371cc1..0000000000 --- a/recipes/spin_magazine.recipe +++ /dev/null @@ -1,18 +0,0 @@ -from calibre.web.feeds.news import BasicNewsRecipe - - -class AdvancedUserRecipe1296179411(BasicNewsRecipe): - title = u'SPIN Magzine' - __author__ = 'Quistopher' - language = 'en' - oldest_article = 7 - max_articles_per_feed = 100 - - feeds = [ - (u'Daily Noise Blog | SPIN.com', u'http://www.spin.com/blog/feed'), - (u'It Happened Last Night | SPIN.com', - u'http://www.spin.com/it-happened-last-night/feed'), - (u'Album Reviews | SPIN.com', - u'http://www.spin.com/album-reviews/feed') - - ] diff --git a/recipes/sportowefakty.recipe b/recipes/sportowefakty.recipe deleted file mode 100644 index fe613235a6..0000000000 --- a/recipes/sportowefakty.recipe +++ /dev/null @@ -1,60 +0,0 @@ -#!/usr/bin/env python - -__license__ = 'GPL v3' - -import re - -from calibre.web.feeds.news import BasicNewsRecipe - - -class sportowefakty(BasicNewsRecipe): - title = u'SportoweFakty' - __author__ = 'Artur Stachecki , Tomasz Długosz ' - language = 'pl' - description = u'Najważniejsze informacje sportowe z kraju i ze świata, relacje, komentarze, wywiady, zdjęcia!' - oldest_article = 1 - masthead_url = 'http://www.sportowefakty.pl/images/logo.png' - max_articles_per_feed = 100 - simultaneous_downloads = 5 - use_embedded_content = False - remove_javascript = True - no_stylesheets = True - ignore_duplicate_articles = {'title', 'url'} - - keep_only_tags = [dict(attrs={'class': 'box-article'})] - remove_tags = [] - remove_tags.append(dict(attrs={'class': re.compile(r'^newsStream')})) - remove_tags.append(dict(attrs={'target': '_blank'})) - - feeds = [ - (u'Piłka Nożna', u'http://www.sportowefakty.pl/pilka-nozna/index.rss'), - (u'Koszykówka', u'http://www.sportowefakty.pl/koszykowka/index.rss'), - (u'Żużel', u'http://www.sportowefakty.pl/zuzel/index.rss'), - (u'Siatkówka', u'http://www.sportowefakty.pl/siatkowka/index.rss'), - (u'Zimowe', u'http://www.sportowefakty.pl/zimowe/index.rss'), - (u'Hokej', u'http://www.sportowefakty.pl/hokej/index.rss'), - (u'Moto', u'http://www.sportowefakty.pl/moto/index.rss'), - (u'Tenis', u'http://www.sportowefakty.pl/tenis/index.rss') - ] - - def get_article_url(self, article): - link = article.get('link', None) - if 'utm_source' in link: - return link.split('?utm')[0] - else: - return link - - def print_version(self, url): - print_url = url + '/drukuj' - return print_url - - def preprocess_html(self, soup): - head = soup.find('h1') - if 'Fotorelacja' in self.tag_to_string(head): - return None - else: - for alink in soup.findAll('a'): - if alink.string is not None: - tstr = alink.string - alink.replaceWith(tstr) - return soup diff --git a/recipes/sporza_be.recipe b/recipes/sporza_be.recipe deleted file mode 100644 index ae01107aba..0000000000 --- a/recipes/sporza_be.recipe +++ /dev/null @@ -1,52 +0,0 @@ -#!/usr/bin/env python - -from calibre.web.feeds.news import BasicNewsRecipe - - -class Sporzabe(BasicNewsRecipe): - title = u'Sporza.be' - __author__ = u'erkfuizfeuadjfjzefzfuzeff' - description = u'Sport news from Belgium in Dutch' - oldest_article = 7 - language = 'nl_BE' - - max_articles_per_feed = 100 - no_stylesheets = True - use_embedded_content = False - - keep_only_tags = [ - dict(name='title'), dict(name='div', attrs={'id': 'intro'}), dict(name='h3'), - dict(name='h1'), dict(name='span', attrs={'class': 'media_holder'}), - dict(name='div', attrs={'class': 'divider image'}), - dict(name='div', attrs={'class': 'paragraph'}) - ] - - remove_tags = [] - - feeds = [ - ( - u'Voetbal', u'http://sporza.be/cm/sporza/voetbal?mode=atom&action=submit' - ), - ( - u'Wielrennen', - u'http://sporza.be/cm/sporza/wielrennen?mode=atom&action=submit' - ), - ( - u'Tennis', u'http://sporza.be/cm/sporza/tennis?mode=atom&action=submit' - ), - ( - u'Auto en Motor', - u'http://sporza.be/cm/sporza/auto_motor?mode=atom&action=submit' - ), - ( - u'Atletiek', - u'http://sporza.be/cm/sporza/atletiek?mode=atom&action=submit' - ), - ( - u'Zaal', u'http://sporza.be/cm/sporza/zaal?mode=atom&action=submit' - ), - ( - u'Ander nieuws', - u'http://sporza.be/cm/sporza/ander_nieuws?mode=atom&action=submit' - ) - ] diff --git a/recipes/rian_eng.recipe b/recipes/sputnik.recipe similarity index 57% rename from recipes/rian_eng.recipe rename to recipes/sputnik.recipe index ac40504ee6..f11c48da5c 100644 --- a/recipes/rian_eng.recipe +++ b/recipes/sputnik.recipe @@ -1,4 +1,4 @@ - +#!/usr/bin/env python __license__ = 'GPL v3' __copyright__ = '2009, Darko Miletic ' ''' @@ -9,28 +9,21 @@ from calibre.web.feeds.news import BasicNewsRecipe class Ria_eng(BasicNewsRecipe): - title = 'Ria Novosti' + title = 'Sputnik News' __author__ = 'Darko Miletic' description = 'News from Russia in English' language = 'en_RU' - publisher = 'en.rian.ru' category = 'news, politics, Russia' oldest_article = 3 max_articles_per_feed = 100 no_stylesheets = True use_embedded_content = False encoding = 'utf-8' + auto_cleanup = True conversion_options = { 'comment': description, 'tags': category, 'publisher': publisher, 'language': language } - keep_only_tags = [dict(name='div', attrs={'class': 'article'})] - remove_tags = [ - dict(name=['object', 'link', 'iframe', 'base']), dict(name='div', attrs={'class': [ - 'related', 'mmban', 'view-story']}), dict(name='span', attrs={'class': 'copyright'}) - ] - remove_tags_after = dict(name='div', attrs={'class': 'text'}) - - feeds = [(u'Online news', u'http://en.rian.ru/export/rss2/archive/index.xml')] + feeds = [(u'News', u'https://sputnikglobe.com/export/rss2/archive/index.xml')] diff --git a/recipes/stamgasten.recipe b/recipes/stamgasten.recipe deleted file mode 100644 index c101bf5ad8..0000000000 --- a/recipes/stamgasten.recipe +++ /dev/null @@ -1,20 +0,0 @@ -from calibre.web.feeds.news import BasicNewsRecipe - - -class AdvancedUserRecipe1347706704(BasicNewsRecipe): - title = u'Stamgasten' - __author__ = u'DrMerry' - description = u'Stamgasten de populaire strip van Toon van Driel (http://www.toonvandriel.nl)' - language = u'nl' - oldest_article = 7 - max_articles_per_feed = 100 - auto_cleanup = False - cover_url = 'http://shop.toonvandriel.nl/productimg.php?type=canvas&id=15&size=large' - no_stylesheets = True - remove_javascript = True - remove_empty_feeds = True - remove_tags_before = dict(id='title') - remove_tags_after = dict(attrs={'class': 'entry-content rich-content'}) - extra_css = 'img{border:0;padding:0;margin:0;width:100%}' - - feeds = [(u'Stamgasten', u'http://toonvandriel.nl/feed/')] diff --git a/recipes/standardmoney.recipe b/recipes/standardmoney.recipe deleted file mode 100644 index f28ee28ca8..0000000000 --- a/recipes/standardmoney.recipe +++ /dev/null @@ -1,41 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- - -__license__ = 'GPL v3' -__copyright__ = u'2011, Silviu Cotoar\u0103' -''' -standard.money.ro -''' - -from calibre.web.feeds.news import BasicNewsRecipe - - -class StandardMoneyRo(BasicNewsRecipe): - title = 'Standard Money Ro' - __author__ = u'Silviu Cotoar\u0103' - publisher = 'Standard Money' - description = 'Portal de Business' - oldest_article = 5 - language = 'ro' - max_articles_per_feed = 100 - no_stylesheets = True - use_embedded_content = False - category = 'Ziare,Stiri,Romania' - encoding = 'utf-8' - cover_url = 'http://assets.standard.ro/wp-content/themes/standard/images/standard-logo.gif' - - conversion_options = { - 'comments': description, 'tags': category, 'language': language, 'publisher': publisher - } - - keep_only_tags = [ - dict(name='h1', attrs={'class': 'post-title'} - ), dict(name='div', attrs={'class': 'content_post'}) - ] - - feeds = [ - (u'Actualitate', u'http://standard.money.ro/feed') - ] - - def preprocess_html(self, soup): - return self.adeify_images(soup) diff --git a/recipes/stars_and_stripes.recipe b/recipes/stars_and_stripes.recipe deleted file mode 100644 index a587b47d24..0000000000 --- a/recipes/stars_and_stripes.recipe +++ /dev/null @@ -1,35 +0,0 @@ -''' Stars and Stripes - ''' - - -import re - -from calibre.web.feeds.recipes import BasicNewsRecipe - - -class AdvancedUserRecipe1308791026(BasicNewsRecipe): - title = u'Stars and Stripes' - oldest_article = 3 - max_articles_per_feed = 100 - __author__ = 'adoucette' - description = 'The U.S. militarys independent news source, featuring exclusive reports from Iraq, Afghanistan, Europe and the Far East.' - no_stylesheets = True - use_embedded_content = False - encoding = 'utf8' - publisher = 'stripes.com' - category = 'news, US, world' - language = 'en' - publication_type = 'newsportal' - preprocess_regexps = [(re.compile(r'', re.DOTALL), lambda m: '')] - conversion_options = { - 'comments': description, 'tags': category, 'language': language, 'publisher': publisher, 'linearize_tables': True - } - keep_only_tags = [dict(name='div', attrs={'class': ['element article']})] - remove_tags_after = [dict(name='ul', attrs={'class': 'inline-bookmarks'})] - feeds = [ - (u'News', u'http://feeds.stripes.com/starsandstripes/news'), - (u'Sports', u'http://feeds.stripes.com/starsandstripes/sports'), - (u'Military Life', u'http://feeds.stripes.com/starsandstripes/militarylife'), - (u'Opinion', u'http://feeds.stripes.com/starsandstripes/opinion'), - (u'Travel', u'http://feeds.stripes.com/starsandstripes/travel') - ] diff --git a/recipes/starwars.recipe b/recipes/starwars.recipe deleted file mode 100644 index 56ccaf267b..0000000000 --- a/recipes/starwars.recipe +++ /dev/null @@ -1,53 +0,0 @@ -# -*- coding: utf-8 -*- -from calibre.web.feeds.news import BasicNewsRecipe - - -class TheForce(BasicNewsRecipe): - title = u'The Force' - language = 'en' - __author__ = 'Krittika Goyal' - oldest_article = 1 # days - max_articles_per_feed = 25 - encoding = 'cp1252' - - remove_stylesheets = True - conversion_options = {'linearize_tables': True} - remove_tags_after = dict(name='div', attrs={'class': 'KonaBody'}) - keep_only_tags = dict( - name='td', attrs={'background': '/images/span/tile_story_bgtile.gif'}) - remove_tags = [ - dict(name='iframe'), - ] - - feeds = [ - ('The Force', - 'http://www.theforce.net/outnews/tfnrdf.xml'), - ] - - def preprocess_html(self, soup): - for tag in soup.findAll(name='i'): - if 'Remember to join the Star Wars Insider Facebook' in self.tag_to_string(tag): - for x in tag.findAllNext(): - x.extract() - tag.extract() - break - tag = soup.find(attrs={'class': 'articleoption'}) - if tag is not None: - tag = tag.findParent('table') - if tag is not None: - for x in tag.findAllNext(): - x.extract() - tag.extract() - - for img in soup.findAll('img', src=True): - a = img.findParent('a', href=True) - if a is None: - continue - url = a.get('href').split('?')[-1].partition('=')[-1] - if url: - img.extract() - a.name = 'img' - a['src'] = url - del a['href'] - img['src'] = url - return soup diff --git a/recipes/stnn.recipe b/recipes/stnn.recipe deleted file mode 100644 index 7749553441..0000000000 --- a/recipes/stnn.recipe +++ /dev/null @@ -1,54 +0,0 @@ - - -__license__ = 'GPL v3' -__copyright__ = '2010, Larry Chan ' -''' -Singtao STNN -''' -from calibre.web.feeds.recipes import BasicNewsRecipe - - -class SingtaoSTNN(BasicNewsRecipe): - title = 'Singtao STNN' - __author__ = 'Larry Chan, larry1chan' - description = 'Chinese News' - oldest_article = 2 - max_articles_per_feed = 100 - simultaneous_downloads = 5 - no_stylesheets = True - use_embedded_content = False - encoding = 'gb2312' - publisher = 'Singtao STNN' - category = 'news, China, world' - language = 'zh' - publication_type = 'newsportal' - extra_css = ' body{ font-family: Verdana,Helvetica,Arial,sans-serif } .introduction{font-weight: bold} .story-feature{display: block; padding: 0; border: 1px solid; width: 40%; font-size: small} .story-feature h2{text-align: center; text-transform: uppercase} ' # noqa - masthead_url = 'http://www.stnn.cc/images/0806/logo_080728.gif' - conversion_options = { - 'comments': description, 'tags': category, 'language': language, 'publisher': publisher, 'linearize_tables': True - } - - remove_tags_before = dict(name='div', attrs={'class': ['page_box']}) - remove_tags_after = dict(name='div', attrs={'class': ['pagelist']}) - - keep_only_tags = [ - dict(name='div', attrs={'class': ['font_title clearfix']}), - dict(name='div', attrs={'id': ['content_zoom']}) - - ] - - remove_attributes = ['width', 'height', 'href'] - - # for a full list of rss check out [url]http://www.stnn.cc/rss/[/url] - - feeds = [(u'Headline News', u'http://www.stnn.cc/rss/news/index.xml'), - (u'Breaking News', u'http://www.stnn.cc/rss/tufa/index.xml'), - (u'Finance', u'http://www.stnn.cc/rss/fin/index.xml'), - (u'Entertainment', u'http://www.stnn.cc/rss/ent/index.xml'), - (u'International', u'http://www.stnn.cc/rss/guoji/index.xml'), - (u'China', u'http://www.stnn.cc/rss/china/index.xml'), - (u'Opnion', u'http://www.stnn.cc/rss/fin_op/index.xml'), - (u'Blog', u'http://blog.stnn.cc/uploadfile/rssblogtypehotlog.xml'), - (u'Hong Kong', u'http://www.stnn.cc/rss/hongkong/index.xml') - - ] diff --git a/recipes/strategic_culture.recipe b/recipes/strategic_culture.recipe deleted file mode 100644 index df07a09bef..0000000000 --- a/recipes/strategic_culture.recipe +++ /dev/null @@ -1,88 +0,0 @@ - -__license__ = 'GPL v3' -__copyright__ = '2012, Darko Miletic ' - -''' -www.strategic-culture.org -''' - -import time - -from calibre import strftime -from calibre.web.feeds.recipes import BasicNewsRecipe - - -class StrategicCulture(BasicNewsRecipe): - title = 'Strategic Culture Foundation' - __author__ = 'Darko Miletic' - description = 'Online Journal' - publisher = 'Strategic Culture Foundation' - category = 'news, politics' - oldest_article = 7 - max_articles_per_feed = 100 - no_stylesheets = True - encoding = 'utf-8' - use_embedded_content = False - language = 'en' - publication_type = 'newsportal' - masthead_url = 'http://www.strategic-culture.org/img/logo.jpg' - extra_css = ''' - body{font-family: Arial, sans-serif} - h1{font-family: "Times New Roman",Times,serif} - img{margin-bottom: 0.8em} - ''' - - conversion_options = { - 'comment': description, 'tags': category, 'publisher': publisher, 'language': language - } - - keep_only_tags = [ - dict(name=['h1', 'p']), dict(name='div', attrs={'id': 'cke_pastebin'}) - ] - - remove_tags = [dict(name=['object', 'link', 'base', 'meta', 'iframe'])] - - feeds = [ - - (u'News', u'http://www.strategic-culture.org/blocks/news.html'), - (u'Politics', u'http://www.strategic-culture.org/rubrics/politics.html'), - (u'Economics', u'http://www.strategic-culture.org/rubrics/economics.html'), - (u'History & Culture', u'http://www.strategic-culture.org/rubrics/history-and-culture.html'), - (u'Columnists', u'http://www.strategic-culture.org/rubrics/columnists.html') - ] - - def print_version(self, url): - return url.replace('-culture.org/news/', '-culture.org/pview/') - - def parse_index(self): - totalfeeds = [] - lfeeds = self.get_feeds() - for feedobj in lfeeds: - feedtitle, feedurl = feedobj - self.report_progress(0, _('Fetching feed') + ' %s...' % - (feedtitle if feedtitle else feedurl)) - articles = [] - soup = self.index_to_soup(feedurl) - if feedurl.endswith('news.html'): - clname = 'sini14' - else: - clname = 'h22' - checker = [] - for item in soup.findAll('a', attrs={'class': clname}): - atag = item - url = atag['href'] - title = self.tag_to_string(atag) - description = '' - daypart = url.rpartition('/')[0] - mpart, sep, day = daypart.rpartition('/') - ypart, sep, month = mpart.rpartition('/') - year = ypart.rpartition('/')[2] - date = strftime("%a, %d %b %Y %H:%M:%S +0000", - time.strptime(day + "/" + month + "/" + year, "%d/%m/%Y")) - if url not in checker: - checker.append(url) - articles.append({ - 'title': title, 'date': date, 'url': url, 'description': description - }) - totalfeeds.append((feedtitle, articles)) - return totalfeeds diff --git a/recipes/strategy-business.recipe b/recipes/strategy-business.recipe deleted file mode 100644 index ce89e108ae..0000000000 --- a/recipes/strategy-business.recipe +++ /dev/null @@ -1,104 +0,0 @@ -from calibre.web.feeds.news import BasicNewsRecipe - - -class StrategyBusinessRecipe(BasicNewsRecipe): - __license__ = 'GPL v3' - __author__ = 'kwetal' - language = 'en' - version = 1 - - title = u'Strategy+Business' - publisher = u' Booz & Company' - category = u'Business' - description = (u'Business magazine for senior business executives and the people who influence them.' - 'Go to http://www.strategy-business.com/registration to sign up for a free account') - - oldest_article = 13 * 7 # 3 months - max_articles_per_feed = 100 - use_embedded_content = False - remove_empty_feeds = True - needs_subscription = True - - no_stylesheets = True - remove_javascript = True - - def get_browser(self): - br = BasicNewsRecipe.get_browser(self) - br.open('http://www.strategy-business.com/registration') - for i, f in enumerate(br.forms()): - if 'gatekeeper_edit' in f.name: - br.select_form(name=f.name) - for c in f.controls: - if c.name.endswith('_email'): - br[c.name] = self.username - elif c.name.endswith('_password'): - br[c.name] = self.password - br.submit().read() - break - return br - - extra_css = ''' - body{font-family:verdana,arial,helvetica,geneva,sans-serif ;} - a {text-decoration: none; color: blue;} - h1 {margin: 0em; padding: 0em;} - h2 {font-size: medium; font-weight: bold;} - #sb-date {font-size: xx-small; color: #696969} - #category {font-style: italic; font-size: small; color: black; margin: 0em; padding: 0em;} - #byline {font-size: small; color: #666666} - div.profiles {font-size: small; font-style: italic; color: #696969} - div.profiles h2 {font-size: medium; font-style: normal; font-weight: bold; color: black} - ''' - - feeds = [] - feeds.append( - (u'Finance', u'http://feeds.feedburner.com/StrategyBusiness-Finance?format=xml')) - feeds.append((u'Global Perspective', - u'http://feeds.feedburner.com/StrategyBusiness-GlobalPerspective?format=xml')) - feeds.append( - (u'Innovation', u'http://feeds.feedburner.com/StrategyBusiness-Innovation?format=xml')) - feeds.append((u'Marketing And Sales', - u'http://feeds.feedburner.com/StrategyBusiness-MarketingAndSales?format=xml')) - feeds.append((u'Operations And Manufacturing', - u'http://feeds.feedburner.com/StrategyBusiness-OperationsAndManufacturing?format=xml')) - feeds.append((u'Organizations And People', - u'http://feeds.feedburner.com/StrategyBusiness-OrganizationsAndPeople?format=xml')) - feeds.append((u'Strategy And Leadership', - u'http://feeds.feedburner.com/StrategyBusiness-StrategyAndLeadership?format=xml')) - feeds.append((u'Sustainability', - u'http://feeds.feedburner.com/StrategyBusiness-Sustainability?format=xml')) - feeds.append((u'Auto, Airlines And Transport', - u'http://feeds.feedburner.com/StrategyBusiness-AutoAirlinesAndTransport?format=xml')) - feeds.append((u'Consumer Products', - u'http://feeds.feedburner.com/StrategyBusiness-ConsumerProducts?format=xml')) - feeds.append( - (u'Energy', u'http://feeds.feedburner.com/StrategyBusiness-Energy?format=xml')) - feeds.append( - (u'Health Care', u'http://feeds.feedburner.com/StrategyBusiness-HealthCare?format=xml')) - feeds.append( - (u'Technology', u'http://feeds.feedburner.com/StrategyBusiness-Technology?format=xml')) - feeds.append((u'Thought Leaders', - u'http://feeds.feedburner.com/StrategyBusiness-ThoughtLeaders?format=xml')) - feeds.append((u'Business Literature', - u'http://feeds.feedburner.com/StrategyBusiness-BusinessLiterature?format=xml')) - feeds.append((u'Recent Research', - u'http://feeds.feedburner.com/StrategyBusiness-RecentResearch?format=xml')) - - keep_only_tags = [ - dict(name='h1'), - dict(attrs={'class': ['introAndByline', 'content', 'resources']}), - ] - - remove_tags = [] - remove_tags.append(dict(name='img', attrs={'class': 'content1'})) - remove_tags.append( - dict(name='img', attrs={'src': '/media/image/end_of_story.gif'})) - remove_tags.append(dict(name='div', attrs={'class': [ - 'sb-adarea468', 'GigyaShare', 'moreBlogLinks', 'clearboth', 'GigyaCommentsContainer']})) - remove_tags.append(dict(name='div', attrs={'id': 'sb-paging'})) - remove_tags.append(dict(name='div', attrs={'id': 'textsize'})) - remove_tags.append( - dict(name='div', id=lambda x: x and x.startswith('div-gpt-ad-'))) - - def get_article_url(self, article): - url = BasicNewsRecipe.get_article_url(self, article) - return url.partition('?')[0] + '?pg=all' diff --git a/recipes/sueddeutsche_mobil.recipe b/recipes/sueddeutsche_mobil.recipe deleted file mode 100644 index 9516fdce31..0000000000 --- a/recipes/sueddeutsche_mobil.recipe +++ /dev/null @@ -1,133 +0,0 @@ -__license__ = 'GPL v3' -__copyright__ = '2012, 2013 Andreas Zeiser ' -''' -szmobil.sueddeutsche.de/ -''' -# History -# 2015.01.02 Daily Cover from https://zeitung.sueddeutsche.de/webapp by lala-rob (web@lala-rob.de) -# 2014.12.18 Fixing URL set Cover by lala-rob (web@lala-rob.de) -# 2014.10.06 Fixing Login URL and Article URL by lala-rob (web@lala-rob.de) -# -# 2013.01.09 Fixed bugs in article titles containing "strong" and -# other small changes -# 2012.08.04 Initial release - -import datetime -import re - -from calibre import strftime -from calibre.utils.date import utcnow -from calibre.web.feeds.recipes import BasicNewsRecipe - - -class SZmobil(BasicNewsRecipe): - title = u'Süddeutsche Zeitung mobil' - __author__ = u'Andreas Zeiser' - description = u'Nachrichten aus Deutschland. Zugriff auf kostenpflichtiges Abo SZ mobil.' - publisher = u'Sueddeutsche Zeitung' - masthead_url = 'http://pix.sueddeutsche.de/img/layout/header/SZ_solo288x31.gif' - language = u'de' - publication_type = u'newspaper' - category = u'news, politics, Germany' - cover_url = 'https://zeitung.sueddeutsche.de/szdigital/public/issue/previewimage?size=l&issueId=' + \ - (utcnow() + datetime.timedelta(hours=1) - ).strftime("%Y-%m-%d") + '&targetVersion=3&productId=sz' - no_stylesheets = True - oldest_article = 2 - encoding = 'iso-8859-1' - needs_subscription = True - remove_empty_feeds = True - delay = 1 - - # if you want to get rid of the date on the title page use - # timefmt = '' - timefmt = ' [%a, %d %b, %Y]' - - root_url = 'http://epaper.sueddeutsche.de/app/service/epaper-mobil/' - keep_only_tags = [dict(name='div', attrs={'class': 'article'})] - - def get_browser(self): - browser = BasicNewsRecipe.get_browser(self) - - # Login via fetching of Streiflicht -> Fill out login request - url = 'https://id.sueddeutsche.de/login' - browser.open(url) - - browser.select_form(nr=0) # to select the first form - browser['login'] = self.username - browser['password'] = self.password - browser.submit() - return browser - - def parse_index(self): - # find all sections - src = self.index_to_soup( - 'http://epaper.sueddeutsche.de/app/service/epaper-mobil/') - feeds = [] - for itt in src.findAll('a', href=True): - if itt['href'].startswith('section.php?section'): - feeds.append((itt.string[0:-2], itt['href'])) - - all_articles = [] - for feed in feeds: - feed_url = self.root_url + feed[1] - feed_title = feed[0] - - self.report_progress(0, ('Fetching feed') + ' %s...' % - (feed_title if feed_title else feed_url)) - - src = self.index_to_soup(feed_url) - articles = [] - shorttitles = dict() - for itt in src.findAll('a', href=True): - if itt['href'].startswith('article.php?id='): - article_url = itt['href'] - article_id = int( - re.search(r"id=(\d*)&etag=", itt['href']).group(1)) - - # first check if link is a special article in section - # "Meinungsseite" - if itt.find('strong') is not None: - article_name = itt.strong.string - if len(itt.contents) > 1: - shorttitles[article_id] = itt.contents[1] - - articles.append( - (article_name, article_url, article_id)) - continue - - # candidate for a general article - if itt.string is None: - article_name = '' - else: - article_name = itt.string - - if (article_name.find(" mehr") == 0): - # just another link ("mehr") to an article - continue - - if itt.get('id') is not None: - shorttitles[article_id] = article_name - else: - articles.append( - (article_name, article_url, article_id)) - - feed_articles = [] - for article_name, article_url, article_id in articles: - url = self.root_url + article_url - title = article_name - # if you want to get rid of date for each article use - # pubdate = strftime('') - pubdate = strftime('[%a, %d %b]') - description = '' - if shorttitles.get(article_id) is not None: - description = shorttitles[article_id] - # we do not want the flag ("Impressum") - if "HERAUSGEGEBEN VOM" in description: - continue - d = dict(title=title, url=url, date=pubdate, - description=description, content='') - feed_articles.append(d) - all_articles.append((feed_title, feed_articles)) - - return all_articles diff --git a/recipes/sueddeutschezeitung.recipe b/recipes/sueddeutschezeitung.recipe deleted file mode 100644 index 88125650af..0000000000 --- a/recipes/sueddeutschezeitung.recipe +++ /dev/null @@ -1,126 +0,0 @@ -__license__ = 'GPL v3' -__copyright__ = '2010, Darko Miletic ' -''' -www.sueddeutsche.de/sz/ -''' -# History -# 2015.01.02 Daily Cover from https://zeitung.sueddeutsche.de/webapp by lala-rob (web@lala-rob.de) -# 2014.12.15 Set Cover by lala-rob(web@lala-rob.de) -# 2014.10.02 Fixed url Problem by lala-rob(web@lala-rob.de) - -import datetime - -from calibre import strftime -from calibre.utils.date import utcnow -from calibre.web.feeds.news import BasicNewsRecipe - - -class SueddeutcheZeitung(BasicNewsRecipe): - title = u'Süddeutsche Zeitung' - __author__ = 'Darko Miletic' - description = 'News from Germany. Access to paid content.' - publisher = u'Süddeutsche Zeitung' - category = 'news, politics, Germany' - no_stylesheets = True - oldest_article = 2 - encoding = 'iso-8859-1' - needs_subscription = True - remove_empty_feeds = True - delay = 1 - cover_url = 'https://zeitung.sueddeutsche.de/szdigital/public/issue/previewimage?size=l&issueId=' + \ - (utcnow() + datetime.timedelta(hours=1) - ).strftime("%Y-%m-%d") + '&targetVersion=3&productId=sz' - PREFIX = 'http://epaper.sueddeutsche.de' - INDEX = PREFIX + '/app/epaper/textversion/' - use_embedded_content = False - masthead_url = 'http://pix.sueddeutsche.de/img/layout/header/SZ_solo288x31.gif' - language = 'de' - publication_type = 'newspaper' - extra_css = ' body{font-family: Arial,Helvetica,sans-serif} ' - - conversion_options = { - 'comment': description, 'tags': category, 'publisher': publisher, 'language': language, 'linearize_tables': True - } - - remove_attributes = ['height', 'width', 'style'] - - def get_browser(self): - browser = BasicNewsRecipe.get_browser(self) - - # Login via fetching of Streiflicht -> Fill out login request - url = 'https://id.sueddeutsche.de/login' - browser.open(url) - - browser.select_form(nr=0) # to select the first form - browser['login'] = self.username - browser['password'] = self.password - browser.submit() - - return browser - - remove_tags = [ - dict(attrs={'class': 'hidePrint'}), dict( - name=['link', 'object', 'embed', 'base', 'iframe', 'br']) - ] - keep_only_tags = [dict(attrs={'class': 'artikelBox'})] - remove_tags_before = dict(attrs={'class': 'artikelTitel'}) - remove_tags_after = dict(attrs={'class': 'author'}) - - feeds = [ - - (u'Politik', INDEX + 'Politik/'), - (u'Seite drei', INDEX + 'Seite+drei/'), - (u'Thema des Tages', INDEX + 'Thema+des+Tages/'), - (u'Meinungsseite', INDEX + 'Meinungsseite/'), - (u'Wissen', INDEX + 'Wissen/'), - (u'Panorama', INDEX + 'Panorama/'), - (u'Feuilleton', INDEX + 'Feuilleton/'), - (u'Medien', INDEX + 'Medien/'), - (u'Wirtschaft', INDEX + 'Wirtschaft/'), - (u'Sport', INDEX + 'Sport/'), - (u'Bayern', INDEX + 'Bayern/'), - (u'Muenchen', INDEX + 'M%FCnchen/'), - (u'Muenchen City', INDEX + 'M%FCnchen+City/'), - (u'Jetzt.de', INDEX + 'Jetzt.de/'), - (u'Reise', INDEX + 'Reise/'), - (u'SZ Extra', INDEX + 'SZ+Extra/'), - (u'Wochenende', INDEX + 'SZ+am+Wochenende/'), - (u'Stellen-Markt', INDEX + 'Stellen-Markt/'), - (u'Motormarkt', INDEX + 'Motormarkt/'), - (u'Immobilien-Markt', INDEX + 'Immobilien-Markt/'), - (u'Thema', INDEX + 'Thema/'), - (u'Forum', INDEX + 'Forum/'), - (u'Leute', INDEX + 'Leute/'), - (u'Jugend', INDEX + 'Jugend/'), - (u'Beilage', INDEX + 'Beilage/') - ] - - def parse_index(self): - src = self.index_to_soup(self.INDEX) - id = '' - for itt in src.findAll('a', href=True): - if itt['href'].startswith('/app/epaper/textversion/inhalt/'): - id = itt['href'].rpartition('/inhalt/')[2] - totalfeeds = [] - lfeeds = self.get_feeds() - for feedobj in lfeeds: - feedtitle, feedurl = feedobj - self.report_progress(0, ('Fetching feed') + ' %s...' % - (feedtitle if feedtitle else feedurl)) - articles = [] - soup = self.index_to_soup(feedurl + id) - tbl = soup.find(attrs={'class': 'szprintd'}) - for item in tbl.findAll(name='td', attrs={'class': 'topthema'}): - atag = item.find(attrs={'class': 'Titel'}).a - ptag = item.find('p') - stag = ptag.find('script') - if stag: - stag.extract() - url = self.PREFIX + atag['href'] - title = self.tag_to_string(atag) - description = self.tag_to_string(ptag) - articles.append({ - 'title': title, 'date': strftime(self.timefmt), 'url': url, 'description': description - }) - totalfeeds.append((feedtitle, articles)) - return totalfeeds diff --git a/recipes/superbebe.recipe b/recipes/superbebe.recipe deleted file mode 100644 index 491cad033a..0000000000 --- a/recipes/superbebe.recipe +++ /dev/null @@ -1,49 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- - -__license__ = 'GPL v3' -__copyright__ = u'2011, Silviu Cotoar\u0103' -''' -superbebe.ro -''' - -from calibre.web.feeds.news import BasicNewsRecipe - - -class Superbebe(BasicNewsRecipe): - title = u'Superbebe' - __author__ = u'Silviu Cotoar\u0103' - description = 'Superbebe' - publisher = 'Superbebe' - oldest_article = 5 - language = 'ro' - max_articles_per_feed = 100 - no_stylesheets = True - use_embedded_content = False - category = 'Ziare,Reviste,Bebe,Mamici' - encoding = 'utf-8' - cover_url = 'http://www.superbebe.ro/images/superbebe.gif' - - conversion_options = { - 'comments': description, 'tags': category, 'language': language, 'publisher': publisher - } - - keep_only_tags = [ - dict(name='div', attrs={'class': 'articol'}) - ] - - remove_tags = [ - dict(name='div', attrs={'class': ['info']}), dict( - name='div', attrs={'class': ['tags']}) - ] - - remove_tags_after = [ - dict(name='div', attrs={'class': ['tags']}) - ] - - feeds = [ - (u'Feeds', u'http://www.superbebe.ro/rss') - ] - - def preprocess_html(self, soup): - return self.adeify_images(soup) diff --git a/recipes/superesportes.recipe b/recipes/superesportes.recipe deleted file mode 100644 index d2f24d650a..0000000000 --- a/recipes/superesportes.recipe +++ /dev/null @@ -1,76 +0,0 @@ -__license__ = 'GPL v3' -__copyright__ = '2010, Luciano Furtado ' -''' -www.superesportes.com.br -''' - -from calibre.web.feeds.news import BasicNewsRecipe - - -class SuperEsportesRecipe(BasicNewsRecipe): - - title = u'www.superesportes.com.br' - description = u'Superesportes - Not√≠cias do esporte no Brasil e no mundo' - __author__ = 'Luciano Furtado' - language = 'pt' - category = 'esportes, Brasil' - no_stylesheets = True - oldest_article = 7 - - use_embedded_content = 0 - max_articles_per_feed = 10 - cover_url = 'http://imgs.mg.superesportes.com.br/superesportes_logo.png' - - extra_css = 'div.info_noticias h1 { font-size: 100% }' - - remove_tags = [ - dict(name='div', attrs={'class': 'topo'}), - dict(name='div', attrs={'class': 'rodape'}), - dict(name='div', attrs={'class': 'navegacao'}), - dict(name='div', attrs={'class': 'lateral2'}), - dict(name='div', attrs={'class': 'leia_mais'}), - dict(name='div', attrs={'id': 'comentar'}), - dict(name='div', attrs={'id': 'vrumelc_noticia'}), - dict(name='div', attrs={'class': 'compartilhe'}), - dict(name='div', attrs={'class': 'linha_noticias'}), - dict(name='div', attrs={'class': 'botoes_noticias'}), - dict(name='div', attrs={'class': 'barra_time bg_time'}), - ] - - def parse_index(self): - feeds = [] - sections = [ - (u'Atletico', 'http://www.df.superesportes.com.br/futebol/atletico-mg/capa_atletico_mg/index.shtml'), - (u'Botafogo', 'http://www.df.superesportes.com.br/futebol/botafogo/capa_botafogo/index.shtml'), - (u'Corinthinas', 'http://www.df.superesportes.com.br/futebol/corinthians/capa_corinthians/index.shtml'), - (u'Cruzeiro', 'http://www.df.superesportes.com.br/futebol/cruzeiro/capa_cruzeiro/index.shtml'), - (u'Flamengo', 'http://www.df.superesportes.com.br/futebol/flamengo/capa_flamengo/index.shtml'), - (u'Fluminense', 'http://www.df.superesportes.com.br/futebol/fluminense/capa_fluminense/index.shtml'), - (u'Palmeiras', 'http://www.df.superesportes.com.br/futebol/palmeiras/capa_palmeiras/index.shtml'), - (u'Santos', 'http://www.df.superesportes.com.br/futebol/santos/capa_santos/index.shtml'), - (u'S√£o Paulo', 'http://www.df.superesportes.com.br/futebol/sao-paulo/capa_sao_paulo/index.shtml'), - (u'Vasco', 'http://www.df.superesportes.com.br/futebol/vasco/capa_vasco/index.shtml'), - ] - - for section, url in sections: - current_articles = [] - - soup = self.index_to_soup(url) - latestNews = soup.find( - name='ul', attrs={'class': 'lista_ultimas_noticias'}) - - for li_tag in latestNews.findAll(name='li'): - a_tag = li_tag.find('a', href=True) - if a_tag is None: - continue - title = self.tag_to_string(a_tag) - url = a_tag.get('href', False) - self.log("\n\nFound title: " + title + - "\nUrl: " + url + "\nSection: " + section) - current_articles.append( - {'title': title, 'url': url, 'description': title, 'date': ''}) - - if current_articles: - feeds.append((section, current_articles)) - - return feeds diff --git a/recipes/syddjurslokalavisen_dk.recipe b/recipes/syddjurslokalavisen_dk.recipe deleted file mode 100644 index eba72320fa..0000000000 --- a/recipes/syddjurslokalavisen_dk.recipe +++ /dev/null @@ -1,32 +0,0 @@ -#!/usr/bin/env python -# vim:fileencoding=utf-8 -# https://manual.calibre-ebook.com/news_recipe.html -from __future__ import absolute_import, division, print_function, unicode_literals - -from calibre.web.feeds.news import BasicNewsRecipe - -''' -Adresseavisen Syddjurs -''' - - -class SyddjursLokalavisen_dk(BasicNewsRecipe): - __author__ = 'CoderAllan.github.com' - title = 'Adresseavisen Syddjurs' - description = 'Lokale og regionale nyheder, sport og kultur fra det sydlige Djursland på syddjurs.lokalavisen.dk' - category = 'newspaper, news, localnews, sport, culture, Denmark' - oldest_article = 7 - max_articles_per_feed = 50 - auto_cleanup = True - language = 'da' - - feeds = [ - ('Seneste nyt fra Adresseavisen Syddjurs', 'http://syddjurs.lokalavisen.dk/section/senestenytrss'), - ('Seneste lokale nyheder fra Adresseavisen Syddjurs', 'http://syddjurs.lokalavisen.dk/section/senestelokalenyhederrss'), - ('Seneste sport fra Adresseavisen Syddjurs', 'http://syddjurs.lokalavisen.dk/section/senestesportrss'), - ('Seneste 112 nyheder fra Adresseavisen Syddjurs', 'http://syddjurs.lokalavisen.dk/section/seneste112rss'), - ('Seneste kultur nyheder fra Adresseavisen Syddjurs', 'http://syddjurs.lokalavisen.dk/section/senestekulturrss'), - ('Seneste læserbreve fra Adresseavisen Syddjurs', 'http://syddjurs.lokalavisen.dk/section/senestelaeserbreverss'), - - ] -