diff --git a/recipes/icons/sonar21.png b/recipes/icons/sonar21.png new file mode 100644 index 0000000000..cf89748e32 Binary files /dev/null and b/recipes/icons/sonar21.png differ diff --git a/recipes/icons/taipei.png b/recipes/icons/taipei.png deleted file mode 100644 index 8bdca073c8..0000000000 Binary files a/recipes/icons/taipei.png and /dev/null differ diff --git a/recipes/icons/tanea.png b/recipes/icons/tanea.png deleted file mode 100644 index fda49b5148..0000000000 Binary files a/recipes/icons/tanea.png and /dev/null differ diff --git a/recipes/icons/tanuki.png b/recipes/icons/tanuki.png deleted file mode 100644 index bcbffede20..0000000000 Binary files a/recipes/icons/tanuki.png and /dev/null differ diff --git a/recipes/icons/taz.png b/recipes/icons/taz.png deleted file mode 100644 index 30e4b25703..0000000000 Binary files a/recipes/icons/taz.png and /dev/null differ diff --git a/recipes/icons/tech_economy.png b/recipes/icons/tech_economy.png deleted file mode 100644 index b92f4cfb4a..0000000000 Binary files a/recipes/icons/tech_economy.png and /dev/null differ diff --git a/recipes/icons/tech_world.png b/recipes/icons/tech_world.png deleted file mode 100644 index 48c7cb5fed..0000000000 Binary files a/recipes/icons/tech_world.png and /dev/null differ diff --git a/recipes/icons/technology_review.png b/recipes/icons/technology_review.png deleted file mode 100644 index 46efe644f8..0000000000 Binary files a/recipes/icons/technology_review.png and /dev/null differ diff --git a/recipes/icons/technology_review_de.png b/recipes/icons/technology_review_de.png deleted file mode 100644 index 163b186c46..0000000000 Binary files a/recipes/icons/technology_review_de.png and /dev/null differ diff --git a/recipes/icons/techtarget.png b/recipes/icons/techtarget.png deleted file mode 100644 index 4f00895e88..0000000000 Binary files a/recipes/icons/techtarget.png and /dev/null differ diff --git a/recipes/icons/tedneward.png b/recipes/icons/tedneward.png deleted file mode 100644 index 6ce465923e..0000000000 Binary files a/recipes/icons/tedneward.png and /dev/null differ diff --git a/recipes/icons/telam.png b/recipes/icons/telam.png deleted file mode 100644 index e99a01bbfb..0000000000 Binary files a/recipes/icons/telam.png and /dev/null differ diff --git a/recipes/icons/telegraph_in.png b/recipes/icons/telegraph_in.png deleted file mode 100644 index 9fc707204e..0000000000 Binary files a/recipes/icons/telegraph_in.png and /dev/null differ diff --git a/recipes/icons/telegraph_uk.png b/recipes/icons/telegraph_uk.png deleted file mode 100644 index 2ec43e87b7..0000000000 Binary files a/recipes/icons/telegraph_uk.png and /dev/null differ diff --git a/recipes/icons/telepolis_artikel.png b/recipes/icons/telepolis_artikel.png deleted file mode 100644 index 10d7f71665..0000000000 Binary files a/recipes/icons/telepolis_artikel.png and /dev/null differ diff --git a/recipes/icons/thai_post_daily.png b/recipes/icons/thai_post_daily.png deleted file mode 100644 index cc807f67a2..0000000000 Binary files a/recipes/icons/thai_post_daily.png and /dev/null differ diff --git a/recipes/icons/the_daily_news_egypt.png b/recipes/icons/the_daily_news_egypt.png deleted file mode 100644 index b061b2b4c5..0000000000 Binary files a/recipes/icons/the_daily_news_egypt.png and /dev/null differ diff --git a/recipes/icons/the_escapist.png b/recipes/icons/the_escapist.png deleted file mode 100644 index 758ee536a8..0000000000 Binary files a/recipes/icons/the_escapist.png and /dev/null differ diff --git a/recipes/icons/the_feature.png b/recipes/icons/the_feature.png deleted file mode 100644 index f5c4f717a1..0000000000 Binary files a/recipes/icons/the_feature.png and /dev/null differ diff --git a/recipes/icons/the_freeman.png b/recipes/icons/the_freeman.png deleted file mode 100644 index e30f4c41d4..0000000000 Binary files a/recipes/icons/the_freeman.png and /dev/null differ diff --git a/recipes/icons/the_manila_bulletin.png b/recipes/icons/the_manila_bulletin.png deleted file mode 100644 index 0558362870..0000000000 Binary files a/recipes/icons/the_manila_bulletin.png and /dev/null differ diff --git a/recipes/icons/the_manila_times.png b/recipes/icons/the_manila_times.png deleted file mode 100644 index 6e52d7a7bc..0000000000 Binary files a/recipes/icons/the_manila_times.png and /dev/null differ diff --git a/recipes/icons/the_new_age_za.png b/recipes/icons/the_new_age_za.png deleted file mode 100644 index 911968a757..0000000000 Binary files a/recipes/icons/the_new_age_za.png and /dev/null differ diff --git a/recipes/icons/the_new_republic.png b/recipes/icons/the_new_republic.png deleted file mode 100644 index dc756592a1..0000000000 Binary files a/recipes/icons/the_new_republic.png and /dev/null differ diff --git a/recipes/icons/the_philippine_star.png b/recipes/icons/the_philippine_star.png deleted file mode 100644 index e30f4c41d4..0000000000 Binary files a/recipes/icons/the_philippine_star.png and /dev/null differ diff --git a/recipes/icons/the_scotsman.png b/recipes/icons/the_scotsman.png deleted file mode 100644 index 19efd5ba99..0000000000 Binary files a/recipes/icons/the_scotsman.png and /dev/null differ diff --git a/recipes/icons/the_workingham_times.png b/recipes/icons/the_workingham_times.png deleted file mode 100644 index ac6f3b3cc8..0000000000 Binary files a/recipes/icons/the_workingham_times.png and /dev/null differ diff --git a/recipes/icons/thecodelesscode.png b/recipes/icons/thecodelesscode.png deleted file mode 100644 index 6e40d1181f..0000000000 Binary files a/recipes/icons/thecodelesscode.png and /dev/null differ diff --git a/recipes/icons/thedgesingapore.png b/recipes/icons/thedgesingapore.png deleted file mode 100644 index cf4d2e7df1..0000000000 Binary files a/recipes/icons/thedgesingapore.png and /dev/null differ diff --git a/recipes/icons/theluminouslandscape.png b/recipes/icons/theluminouslandscape.png deleted file mode 100644 index a432e96a5d..0000000000 Binary files a/recipes/icons/theluminouslandscape.png and /dev/null differ diff --git a/recipes/icons/themarketticker.png b/recipes/icons/themarketticker.png deleted file mode 100644 index 1b95113e92..0000000000 Binary files a/recipes/icons/themarketticker.png and /dev/null differ diff --git a/recipes/icons/themorningpaper.png b/recipes/icons/themorningpaper.png deleted file mode 100644 index 3103c10c9d..0000000000 Binary files a/recipes/icons/themorningpaper.png and /dev/null differ diff --git a/recipes/icons/thenews.png b/recipes/icons/thenews.png deleted file mode 100644 index 8fbd7f2145..0000000000 Binary files a/recipes/icons/thenews.png and /dev/null differ diff --git a/recipes/icons/theoldfoodie.png b/recipes/icons/theoldfoodie.png deleted file mode 100644 index 99426c2d4e..0000000000 Binary files a/recipes/icons/theoldfoodie.png and /dev/null differ diff --git a/recipes/icons/theonion.png b/recipes/icons/theonion.png deleted file mode 100644 index c0941b8d83..0000000000 Binary files a/recipes/icons/theonion.png and /dev/null differ diff --git a/recipes/icons/thewest_au.png b/recipes/icons/thewest_au.png deleted file mode 100644 index e66164eb97..0000000000 Binary files a/recipes/icons/thewest_au.png and /dev/null differ diff --git a/recipes/icons/think_progress.png b/recipes/icons/think_progress.png deleted file mode 100644 index 33f0b5def1..0000000000 Binary files a/recipes/icons/think_progress.png and /dev/null differ diff --git a/recipes/icons/thn.png b/recipes/icons/thn.png deleted file mode 100644 index 5fb28fed0d..0000000000 Binary files a/recipes/icons/thn.png and /dev/null differ diff --git a/recipes/icons/tidbits.png b/recipes/icons/tidbits.png deleted file mode 100644 index c83e7c4ceb..0000000000 Binary files a/recipes/icons/tidbits.png and /dev/null differ diff --git a/recipes/icons/tijolaco.png b/recipes/icons/tijolaco.png deleted file mode 100644 index 616114c008..0000000000 Binary files a/recipes/icons/tijolaco.png and /dev/null differ diff --git a/recipes/icons/time_turk.png b/recipes/icons/time_turk.png deleted file mode 100644 index c9687a42b4..0000000000 Binary files a/recipes/icons/time_turk.png and /dev/null differ diff --git a/recipes/icons/timesnewroman.png b/recipes/icons/timesnewroman.png deleted file mode 100644 index 39bd309d67..0000000000 Binary files a/recipes/icons/timesnewroman.png and /dev/null differ diff --git a/recipes/icons/tnxm.png b/recipes/icons/tnxm.png deleted file mode 100644 index 527d9e55df..0000000000 Binary files a/recipes/icons/tnxm.png and /dev/null differ diff --git a/recipes/icons/today_online.png b/recipes/icons/today_online.png deleted file mode 100644 index 165241c5d6..0000000000 Binary files a/recipes/icons/today_online.png and /dev/null differ diff --git a/recipes/icons/tomshardware.png b/recipes/icons/tomshardware.png deleted file mode 100644 index 8759ffb474..0000000000 Binary files a/recipes/icons/tomshardware.png and /dev/null differ diff --git a/recipes/icons/tomshardware_de.png b/recipes/icons/tomshardware_de.png deleted file mode 100644 index 8759ffb474..0000000000 Binary files a/recipes/icons/tomshardware_de.png and /dev/null differ diff --git a/recipes/icons/tomshardware_it.png b/recipes/icons/tomshardware_it.png deleted file mode 100644 index ad467b86d3..0000000000 Binary files a/recipes/icons/tomshardware_it.png and /dev/null differ diff --git a/recipes/icons/toronto_sun.png b/recipes/icons/toronto_sun.png deleted file mode 100644 index e9c2e90e77..0000000000 Binary files a/recipes/icons/toronto_sun.png and /dev/null differ diff --git a/recipes/icons/toyokeizai.png b/recipes/icons/toyokeizai.png deleted file mode 100644 index 1caad7a3ed..0000000000 Binary files a/recipes/icons/toyokeizai.png and /dev/null differ diff --git a/recipes/icons/tpm_uk.png b/recipes/icons/tpm_uk.png deleted file mode 100644 index 58bf5c2d63..0000000000 Binary files a/recipes/icons/tpm_uk.png and /dev/null differ diff --git a/recipes/icons/tri_city_herald.png b/recipes/icons/tri_city_herald.png deleted file mode 100644 index 711911cc67..0000000000 Binary files a/recipes/icons/tri_city_herald.png and /dev/null differ diff --git a/recipes/icons/trojmiasto_pl.png b/recipes/icons/trojmiasto_pl.png deleted file mode 100644 index 0c83d3416c..0000000000 Binary files a/recipes/icons/trojmiasto_pl.png and /dev/null differ diff --git a/recipes/icons/trombon.png b/recipes/icons/trombon.png deleted file mode 100644 index a4531dd419..0000000000 Binary files a/recipes/icons/trombon.png and /dev/null differ diff --git a/recipes/icons/tsn.png b/recipes/icons/tsn.png deleted file mode 100644 index 579936675f..0000000000 Binary files a/recipes/icons/tsn.png and /dev/null differ diff --git a/recipes/icons/tuttojove.png b/recipes/icons/tuttojove.png deleted file mode 100644 index fa95114bd2..0000000000 Binary files a/recipes/icons/tuttojove.png and /dev/null differ diff --git a/recipes/icons/tuttosport.png b/recipes/icons/tuttosport.png deleted file mode 100644 index a4e71a3f74..0000000000 Binary files a/recipes/icons/tuttosport.png and /dev/null differ diff --git a/recipes/icons/tveast_dk.png b/recipes/icons/tveast_dk.png deleted file mode 100644 index ffd2a8dfce..0000000000 Binary files a/recipes/icons/tveast_dk.png and /dev/null differ diff --git a/recipes/icons/tvn24.png b/recipes/icons/tvn24.png deleted file mode 100644 index 497af2ea7c..0000000000 Binary files a/recipes/icons/tvn24.png and /dev/null differ diff --git a/recipes/icons/tvxs.png b/recipes/icons/tvxs.png deleted file mode 100644 index 6512af75b5..0000000000 Binary files a/recipes/icons/tvxs.png and /dev/null differ diff --git a/recipes/icons/tweakers.png b/recipes/icons/tweakers.png deleted file mode 100644 index 0915f47883..0000000000 Binary files a/recipes/icons/tweakers.png and /dev/null differ diff --git a/recipes/icons/twitchfilms.png b/recipes/icons/twitchfilms.png deleted file mode 100644 index 4aa0002b09..0000000000 Binary files a/recipes/icons/twitchfilms.png and /dev/null differ diff --git a/recipes/icons/twtfb.png b/recipes/icons/twtfb.png deleted file mode 100644 index 1703cf416a..0000000000 Binary files a/recipes/icons/twtfb.png and /dev/null differ diff --git a/recipes/icons/ubuntu_pl.png b/recipes/icons/ubuntu_pl.png deleted file mode 100644 index 9fb99753ef..0000000000 Binary files a/recipes/icons/ubuntu_pl.png and /dev/null differ diff --git a/recipes/icons/ukraiyns_tizhdien.png b/recipes/icons/ukraiyns_tizhdien.png deleted file mode 100644 index 2c6d8118a6..0000000000 Binary files a/recipes/icons/ukraiyns_tizhdien.png and /dev/null differ diff --git a/recipes/icons/unz.png b/recipes/icons/unz.png new file mode 100644 index 0000000000..014115d41c Binary files /dev/null and b/recipes/icons/unz.png differ diff --git a/recipes/sonar21.recipe b/recipes/sonar21.recipe new file mode 100644 index 0000000000..3750b1a065 --- /dev/null +++ b/recipes/sonar21.recipe @@ -0,0 +1,42 @@ +#!/usr/bin/env python +# vim:fileencoding=utf-8 +from calibre.web.feeds.news import BasicNewsRecipe, classes + + +class Sonar21(BasicNewsRecipe): + title = 'Sonar21' + __author__ = 'unkn0wn' + oldest_article = 7 + language = 'en_US' + max_articles_per_feed = 100 + use_embedded_content = False + masthead_url = 'https://sonar21.com/wp-content/uploads/2024/10/logo_999999_720x216.png' + cover_url = 'https://sonar21.com/wp-content/uploads/2024/09/sonar21_backplate_vertical.jpg' + encoding = 'utf-8' + browser_type = 'webengine' + no_stylesheets = True + remove_attributes = ['style', 'height', 'width'] + extra_css = '.entry-meta, .wp-element-caption, .wp-block-image { font-size: small; }' + + keep_only_tags = [classes('entry-header entry-content')] + + remove_tags = [ + dict(name=['iframe', 'svg']), + classes('addtoany_share_save_container wpd-avatar'), + ] + + recipe_specific_options = { + 'days': { + 'short': 'Oldest article to download from this news source. In days ', + 'long': 'For example, 0.5, gives you articles from the past 12 hours', + 'default': str(oldest_article), + }, + } + + def __init__(self, *args, **kwargs): + BasicNewsRecipe.__init__(self, *args, **kwargs) + d = self.recipe_specific_options.get('days') + if d and isinstance(d, str): + self.oldest_article = float(d) + + feeds = ['https://sonar21.com/feed'] diff --git a/recipes/substack.recipe b/recipes/substack.recipe index 75ea2f8c57..691871145c 100644 --- a/recipes/substack.recipe +++ b/recipes/substack.recipe @@ -6,6 +6,7 @@ # Copyright: Nathan Cook (nathan.cook@gmail.com) ## # Written: 2020-12-18 +# Updated: 2024-11-04 ## __license__ = 'GNU General Public License v3 – https://www.gnu.org/licenses/gpl-3.0.html' @@ -14,6 +15,7 @@ __version__ = 'v0.1.1' __date__ = '2020-12-19' __author__ = 'topynate' +import re import json from calibre.web.feeds.news import BasicNewsRecipe @@ -21,21 +23,36 @@ from mechanize import Request class Substack(BasicNewsRecipe): - title = 'Substack' - __author__ = 'topynate' + title = 'Substack' + __author__ = 'topynate, unkn0wn' + description = 'Use advanced menu if you want to add your own substack handles.' oldest_article = 7 language = 'en' max_articles_per_feed = 100 - auto_cleanup = True + auto_cleanup = True + auto_cleanup_keep = '//*[@class="subtitle"]' needs_subscription = 'optional' use_embedded_content = False + masthead_url = 'https://substack.com/img/substack_wordmark.png' + cover_url = 'https://substack.com/img/substack.png' + extra_css = '.captioned-image-container, .image-container {font-size: small;}' recipe_specific_options = { + 'auths': { + 'short': 'enter the @handles you subscribe to:\nseperated by a space', + 'long': 'julianmacfarlane ianleslie .... ....', + 'default': 'julianmacfarlane ianleslie thesalvo', + }, 'days': { 'short': 'Oldest article to download from this news source. In days ', 'long': 'For example, 0.5, gives you articles from the past 12 hours', - 'default': str(oldest_article) - } + 'default': str(oldest_article), + }, + 'res': { + 'short': 'For hi-res images, select a resolution from the\nfollowing options: 800, 1000, 1200 or 1500', + 'long': 'This is useful for non e-ink devices, and for a lower file size\nthan the default, use 400 or 300.', + 'default': '600', + }, } def __init__(self, *args, **kwargs): @@ -44,12 +61,12 @@ class Substack(BasicNewsRecipe): if d and isinstance(d, str): self.oldest_article = float(d) -# Every Substack publication has an RSS feed at https://{name}.substack.com/feed. -# The same URL provides either all posts, or all free posts + previews of paid posts, -# depending on whether you're logged in. - feeds = [ - ('Novum Lumen', 'https://novumlumen.substack.com/feed'), # gratuitously self-promotional example - ] + # Every Substack publication has an RSS feed at https://{name}.substack.com/feed. + # The same URL provides either all posts, or all free posts + previews of paid posts, + # depending on whether you're logged in. + # feeds = [ + # ('Novum Lumen', 'https://novumlumen.substack.com/feed'), # gratuitously self-promotional example + # ] def get_browser(self): br = BasicNewsRecipe.get_browser(self) @@ -70,3 +87,24 @@ class Substack(BasicNewsRecipe): if res.getcode() != 200: raise ValueError('Login failed, check username and password') return br + + def get_feeds(self): + ans = [] + u = self.recipe_specific_options.get('auths') + if u and isinstance(u, str): + for x in u.split(): + ans.append('https://' + x.replace('@', ' ') + '.substack.com/feed') + return ans + + def preprocess_html(self, soup): + res = '600' + w = self.recipe_specific_options.get('res') + if w and isinstance(w, str): + res = w + for img in soup.findAll('img', attrs={'src': True}): + img['src'] = re.sub(r'w_\d+', 'w_' + res, img['src']) + for src in soup.findAll(['source', 'svg']): + src.extract() + for but in soup.findAll(attrs={'class': ['button-wrapper']}): + but.extract() + return soup diff --git a/recipes/taipei.recipe b/recipes/taipei.recipe deleted file mode 100644 index a753bf526f..0000000000 --- a/recipes/taipei.recipe +++ /dev/null @@ -1,69 +0,0 @@ -import re - -from calibre.web.feeds.news import BasicNewsRecipe - - -def classes(classes): - q = frozenset(classes.split(' ')) - return dict( - attrs={'class': lambda x: x and frozenset(x.split()).intersection(q)} - ) - - -class TN(BasicNewsRecipe): - title = u'Taipei Times' - language = 'en_TW' - __author__ = 'Krittika Goyal' - oldest_article = 1 # days - max_articles_per_feed = 25 - use_embedded_content = False - - no_stylesheets = True - - keep_only_tags = [ - dict(name='h1'), - dict(name='h3', attrs={'class': 'a'}), - classes('main_ipic reporter text page'), - ] - - feeds = [ - ('Front Page', 'http://www.taipeitimes.com/xml/front.rss'), - ('Editorials', 'http://www.taipeitimes.com/xml/editorials.rss'), - ('Taiwan', 'http://www.taipeitimes.com/xml/taiwan.rss'), - ('Features', 'http://www.taipeitimes.com/xml/feat.rss'), - ('Business', 'http://www.taipeitimes.com/xml/biz.rss'), - ('World', 'http://www.taipeitimes.com/xml/world.rss'), - ('Sports', 'http://www.taipeitimes.com/xml/sport.rss'), - ] - - def preprocess_html(self, soup, *a): - for div in soup.findAll(**classes('page')): - for a in div.findAll('a', href=True): - a['data-calibre-follow-link'] = '1' - if a['href'].startswith('/'): - a['href'] = 'http://www.taipeitimes.com' + a['href'] - return soup - - recursions = 1 - - def is_link_wanted(self, url, tag): - digit = re.search(r'/(\d+)$', url) - if digit is not None and tag['data-calibre-follow-link'] == '1' and re.match(r'\d+', self.tag_to_string(tag)) is not None: - if int(digit.group(1)) > 1: - return True - return False - - def postprocess_html(self, soup, *a): - for div in soup.findAll(**classes('page')): - div.extract() - return soup - - # def parse_index(self): - # return [( - # 'Articles', [{ - # 'title': - # 'test', - # 'url': - # 'http://www.taipeitimes.com/News/editorials/archives/2019/02/26/2003710411' - # }] - # )] diff --git a/recipes/tanea.recipe b/recipes/tanea.recipe deleted file mode 100644 index 274f8f867f..0000000000 --- a/recipes/tanea.recipe +++ /dev/null @@ -1,32 +0,0 @@ -from calibre.web.feeds.recipes import BasicNewsRecipe - - -class TaNea(BasicNewsRecipe): - title = u'Ta Nea' - __author__ = 'Pan' - oldest_article = 1 - max_articles_per_feed = 100 - no_stylesheets = True - language = 'el' - - remove_tags_before = dict(name='div', attrs={'id': 'print-body'}) - remove_tags_after = dict(name='div', attrs={'id': 'text'}) - - feeds = [ - (u'\xce\x95\xce\xbb\xce\xbb\xce\xac\xce\xb4\xce\xb1', - u'http://www.tanea.gr/default.asp?pid=66&la=1'), - (u'\xce\x9a\xcf\x8c\xcf\x83\xce\xbc\xce\xbf\xcf\x82', - u'http://www.tanea.gr/default.asp?pid=67&la=1'), - (u'\xce\x9f\xce\xb9\xce\xba\xce\xbf\xce\xbd\xce\xbf\xce\xbc\xce\xaf\xce\xb1', - u'http://www.tanea.gr/default.asp?pid=68&la=1'), - (u'\xce\xa0\xce\xbf\xce\xbb\xce\xb9\xcf\x84\xce\xb9\xcf\x83\xce\xbc\xcf\x8c\xcf\x82', - u'http://www.tanea.gr/default.asp?pid=69&la=1'), - (u'\xce\x93\xce\xbd\xcf\x8e\xce\xbc\xce\xb5\xcf\x82', - u'http://www.tanea.gr/default.asp?pid=79&la=1'), - (u'\xce\xa1\xce\xb9\xcf\x80\xce\xad\xcf\x82', - u'http://www.tanea.gr/default.asp?pid=80&la=1'), - (u'\xce\x91\xce\xb9\xcf\x87\xce\xbc\xce\xad\xcf\x82', - u'http://www.tanea.gr/default.asp?pid=81&la=1')] - - def print_version(self, url): - return url.replace('http://www.tanea.gr/default.asp?pid=2', 'http://www.tanea.gr/default.asp?pid=96') diff --git a/recipes/tanuki.recipe b/recipes/tanuki.recipe deleted file mode 100644 index b02527304b..0000000000 --- a/recipes/tanuki.recipe +++ /dev/null @@ -1,46 +0,0 @@ -import re - -from calibre.web.feeds.news import BasicNewsRecipe - - -class tanuki(BasicNewsRecipe): - title = u'Tanuki' - oldest_article = 7 - __author__ = 'fenuks' - description = u'Tanuki - portal o anime i mandze.' - category = 'anime, manga' - language = 'pl' - max_articles_per_feed = 100 - encoding = 'utf-8' - autocleanup = True - extra_css = 'ul {list-style: none; padding: 0; margin: 0;} .kadr{float: left;} .dwazdania {float: right;}' - preprocess_regexps = [(re.compile(u'

', re.DOTALL), lambda match: ''), (re.compile( - type(u'')(r'
Zobacz jak ocenili
'), re.DOTALL), lambda match: '')] - remove_empty_feeds = True - no_stylesheets = True - keep_only_tags = [dict(attrs={'class': ['animename', 'storyname', 'nextarrow', 'sideinfov', 'sidelinfov', 'sideinfo', 'sidelinfo']}), dict(name='table', attrs={ 'summary': 'Technikalia'}), dict(attrs={'class': ['chaptername', 'copycat']}), dict(id='rightcolumn'), dict(attrs={'class': ['headn_tt', 'subtable']})] # noqa - remove_tags = [dict(name='div', attrs={'class': 'screen'}), dict(id='randomtoplist'), dict(attrs={'class': 'note'})] - feeds = [ - (u'Anime', u'http://anime.tanuki.pl/rss_anime.xml'), - (u'Manga', u'http://manga.tanuki.pl/rss_manga.xml'), - (u'Tomiki', u'http://manga.tanuki.pl/rss_mangabooks.xml'), - - (u'Artyku\u0142y', u'http://czytelnia.tanuki.pl/rss_czytelnia_artykuly.xml'), - (u'Opowiadania', u'http://czytelnia.tanuki.pl/rss_czytelnia.xml')] - - def append_page(self, soup, appendtag): - nexturl = appendtag.find(attrs={'class': 'nextarrow'}) - if nexturl: - while nexturl: - soup2 = self.index_to_soup( - 'http://czytelnia.tanuki.pl' + nexturl['href']) - nexturl = soup2.find(attrs={'class': 'nextarrow'}) - pagetext = soup2.find( - attrs={'class': ['chaptername', 'copycat']}) - pos = len(appendtag.contents) - appendtag.insert(pos, pagetext) - pagetext = soup2.find(attrs={'class': 'copycat'}) - pos = len(appendtag.contents) - appendtag.insert(pos, pagetext) - for r in appendtag.findAll(attrs={'class': 'nextarrow'}): - r.extract() diff --git a/recipes/taz.recipe b/recipes/taz.recipe deleted file mode 100644 index e571fade50..0000000000 --- a/recipes/taz.recipe +++ /dev/null @@ -1,73 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- - -__license__ = 'GPL v3' -__copyright__ = '2010, Lars Jacob jacob.lars at gmail.com' -__docformat__ = 'restructuredtext de' - -''' -www.taz.de/digiabo -''' -import os -import zipfile - -from calibre.ptempfile import PersistentTemporaryFile -from calibre.web.feeds.news import BasicNewsRecipe - -try: - from urllib.error import HTTPError - from urllib.request import HTTPBasicAuthHandler, build_opener, install_opener, urlopen -except ImportError: - from urllib2 import HTTPBasicAuthHandler, HTTPError, build_opener, install_opener, urlopen - - -class TazDigiabo(BasicNewsRecipe): - - title = u'Taz Digiabo' - description = u'Das EPUB DigiAbo der Taz' - language = 'de' - lang = 'de-DE' - - __author__ = 'Lars Jacob' - needs_subscription = True - - conversion_options = { - 'no_default_epub_cover': True - } - - def build_index(self): - domain = "http://dl.taz.de" - - url = domain + "/epub/" - - auth_handler = HTTPBasicAuthHandler() - auth_handler.add_password(realm='TAZ-ABO', - uri=url, - user=self.username, - passwd=self.password) - opener = build_opener(auth_handler) - install_opener(opener) - - try: - f = urlopen(url) - except HTTPError: - self.report_progress(0, _('Can\'t login to download issue')) - raise ValueError('Failed to login, check your username and' - ' password') - - tmp = PersistentTemporaryFile(suffix='.epub') - self.report_progress(0, _('downloading epub')) - tmp.write(f.read()) - tmp.close() - - zfile = zipfile.ZipFile(tmp.name, 'r') - self.report_progress(0, _('extracting epub')) - - zfile.extractall(self.output_dir) - - tmp.close() - index = os.path.join(self.output_dir, 'content.opf') - - self.report_progress(1, _('epub downloaded and extracted')) - - return index diff --git a/recipes/tech_economy.recipe b/recipes/tech_economy.recipe deleted file mode 100644 index 53d8a56f83..0000000000 --- a/recipes/tech_economy.recipe +++ /dev/null @@ -1,15 +0,0 @@ -__license__ = 'GPL v3' -from calibre.web.feeds.news import BasicNewsRecipe - - -class AdvancedUserRecipe1327051385(BasicNewsRecipe): - title = u'Tech Economy' - oldest_article = 7 - max_articles_per_feed = 100 - auto_cleanup = True - masthead_url = 'http://www.techeconomy.it/wp-content/uploads/2012/01/Logo-TE9.png' - feeds = [(u'Tech Economy', u'http://www.techeconomy.it/feed/')] - remove_tags_after = [dict(name='div', attrs={'class': 'cab-author-name'})] - __author__ = 'faber1971' - description = 'Italian website on technology - v1.00 (28, January 2012)' - language = 'it' diff --git a/recipes/tech_world.recipe b/recipes/tech_world.recipe deleted file mode 100644 index 2be557ab8e..0000000000 --- a/recipes/tech_world.recipe +++ /dev/null @@ -1,46 +0,0 @@ -#!/usr/bin/env python -__license__ = 'GPL v3' -__author__ = 'Lorenzo Vigentini' -__copyright__ = '2009, Lorenzo Vigentini ' -__version__ = 'v1.01' -__date__ = '14, January 2010' - -''' -http://www.techworld.com/ -''' - -from calibre.web.feeds.news import BasicNewsRecipe - - -class techworld(BasicNewsRecipe): - __author__ = 'Lorenzo Vigentini' - description = 'Techworld offers the latest breaking IT industry news, product reviews, enterprise software downloads, how-to articles and expert blogs for technical professionals and enterprise users in the UK' # noqa - - title = 'TechWorld' - publisher = 'IDG Communication' - category = ('Apple, Mac, video, computing, product reviews, ' - 'editing, cameras, production') - - language = 'en' - timefmt = '[%a, %d %b, %Y]' - - oldest_article = 7 - max_articles_per_feed = 15 - use_embedded_content = False - recursion = 10 - - remove_javascript = True - no_stylesheets = True - auto_cleanup = True - - feeds = [ - (u'News', u'http://www.techworld.com/news/rss'), - (u'Tutorial', u'http://www.techworld.com/tutorial/rss'), - (u'Reviews', u'http://www.techworld.com/review/rss'), - (u'Features', u'http://www.techworld.com/features/rss'), - (u'Analysis', u'http://www.techworld.com/analysis/rss'), - (u'Galleries', - u'http://www.techworld.com/picture-gallery/rss'), - (u'TechWorld Blogs', - u'http://www.techworld.com/blog/rss'), - ] diff --git a/recipes/technology_review.recipe b/recipes/technology_review.recipe deleted file mode 100644 index f475f9ac4c..0000000000 --- a/recipes/technology_review.recipe +++ /dev/null @@ -1,31 +0,0 @@ -from calibre.web.feeds.news import BasicNewsRecipe - - -class TechnologyReview(BasicNewsRecipe): - title = u'Technology Review' - __author__ = 'rty' - description = 'MIT Technology Magazine (from RSS feeds)' - publisher = 'Technology Review Inc.' - category = 'Technology, Innovation, R&D' - language = 'en' - oldest_article = 14 - max_articles_per_feed = 100 - No_stylesheets = True - auto_cleanup = True - extra_css = """ - .ArticleBody {font: normal; text-align: justify} - .headline {font: bold x-large} - .subheadline {font: italic large} - """ - feeds = [ - (u'Computing', - u'http://feeds.technologyreview.com/technology_review_Computing'), - (u'Energy', - u'http://feeds.technologyreview.com/technology_review_Energy'), - (u'Materials', - u'http://feeds.technologyreview.com/technology_review_Materials'), - (u'Biomedicine', - u'http://feeds.technologyreview.com/technology_review_Biotech'), - (u'Business', - u'http://feeds.technologyreview.com/technology_review_Biztech') - ] diff --git a/recipes/technology_review_de.recipe b/recipes/technology_review_de.recipe deleted file mode 100644 index 3caf9bc54f..0000000000 --- a/recipes/technology_review_de.recipe +++ /dev/null @@ -1,69 +0,0 @@ -#!/usr/bin/env python -# vim:fileencoding=utf-8 -from __future__ import absolute_import, division, print_function, unicode_literals - -__license__ = 'GPL v3' -__copyright__ = '2010, Anton Gillert ' - -''' -Technology Review (deutsch) - heise.de/tr -''' - -import re - -from calibre.web.feeds.news import BasicNewsRecipe - - -class TechnologyReviewDe(BasicNewsRecipe): - title = 'Technology Review' - __author__ = 'Anton Gillert, schuster' - description = 'Technology news from Germany' - language = 'de' - - oldest_article = 14 - max_articles_per_feed = 50 - use_embedded_content = False - no_stylesheets = True - remove_javascript = True - - masthead_url = 'http://1.f.ix.de/imgs/02/3/0/8/5/2/8/tr_logo-544bd18881c81263.png' - - feeds = [ - ('News', 'http://www.heise.de/tr/rss/news-atom.xml'), - ('Blog', 'http://www.heise.de/tr/rss/blog-atom.xml') - ] - - keep_only_tags = [ - dict(name='article') - ] - - remove_tags = [ - dict(name='nav'), - dict(name='figure', attrs={'class': 'logo'}), - dict(name='hr') - ] - - extra_css = '.bild_zentriert {font-size: 0.6em} \ - .source {font-size: 0.6em}' - - def get_cover_url(self): - self.cover_url = '' - soup = self.index_to_soup('http://www.heise.de/tr/magazin/') - img = soup.find('img', alt=re.compile( - 'Titelbild Technology Review'), src=True) - if img: - self.cover_url = 'http://www.heise.de' + img['src'] - return self.cover_url - - def print_version(self, url): - return url + '?view=print' - - def preprocess_html(self, soup): - # remove style attributes - for item in soup.findAll(attrs={'style': True}): - del item['style'] - # remove reference to article source - for p in soup.findAll('p'): - if 'URL dieses Artikels:' in self.tag_to_string(p): - p.extract() - return soup diff --git a/recipes/techtarget.recipe b/recipes/techtarget.recipe deleted file mode 100644 index e60f7b3f69..0000000000 --- a/recipes/techtarget.recipe +++ /dev/null @@ -1,64 +0,0 @@ -from calibre.web.feeds.news import BasicNewsRecipe - - -class TechTarget(BasicNewsRecipe): - title = u'Techtarget' - __author__ = 'Julio:map' - description = '''IT Infrastructure related blogs - from Techtarget''' - publisher = 'Techtarget' - category = 'IT, Infrastructure' - oldest_article = 7 - language = 'en' - max_articles_per_feed = 100 - no_stylesheets = True - use_embedded_content = False - needs_subscription = True - auto_cleanup = False - LOGIN = u'http://searchservervirtualization.techtarget.com/login' - - def get_browser(self): - br = BasicNewsRecipe.get_browser(self) - if self.username is not None: - br.open(self.LOGIN) - br.select_form(nr=1) - br['email'] = self.username - if self.password is not None: - br['password'] = self.password - br.submit() - return br - - keep_only_tags = [dict(name='div', attrs={'id': 'article'}), dict( - name='div', attrs={'class': 'entry'})] - remove_tags = [ - dict(name='div', attrs={'id': ['articleToolbar', 'relatedContent']})] - remove_tags_after = [dict(name='div', attrs={'id': 'relatedContent'})] - - feeds = [ - (u'IT news and analysis for CIOs', - u'http://feeds.pheedo.com/SearchCIOITNewsAndAnalysisForCIOs'), - (u'TotalCIO', u'http://feeds.pheedo.com/1532.xml'), - (u'SearchCIO-Midmarket: Technology news and tips for midmarket CIOs', - u'http://feeds.pheedo.com/techtarget/Searchsmb/Smbs'), - (u'Compliance news and advice for senior IT and business managers', - u'http://feeds.pheedo.com/tt/1200'), - (u'Server virtualization news and opinions', - u'http://feeds.pheedo.com/SearchservervirtualizationServerVirtualizationNewsAndOpinions'), - (u'The Virtualization Room', u'http://feeds.pheedo.com/techtarget/nzLe'), - (u'Server virtualization technical tips and expert advice', - u'http://feeds.pheedo.com/SearchservervirtualizationServerVirtualizationTechnicalTipsAndExpertAdvice'), - (u'Cloud Computing news and Technical Advice', - u'http://feeds.pheedo.com/1260'), - (u'IT infrastructure news', - u'http://feeds.pheedo.com/techtarget/Searchdatacenter/ItInfrastructure'), - (u'Storage Channel Update', - u'http://feeds.pheedo.com/ChannelMarker-TheItChannelWeblog'), - (u'VMware Tips and News', - u'http://feeds.pheedo.com/SearchvmwarecomVmwareTipsAndTricks'), - (u'Enterprise IT news roundup', - u'http://feeds.pheedo.com/WhatisEnterpriseItNewsRoundup'), - (u'WhatIs: Enterprise IT tips and expert advice', - u'http://feeds.pheedo.com/WhatisEnterpriseItTipsAndExpertAdvice'), - (u'WhatIs: Enterprise IT news roundup', - u'http://feeds.pheedo.com/WhatisEnterpriseItNewsRoundup'), - ] diff --git a/recipes/tedneward.recipe b/recipes/tedneward.recipe deleted file mode 100644 index b4039dea52..0000000000 --- a/recipes/tedneward.recipe +++ /dev/null @@ -1,30 +0,0 @@ - -__license__ = 'GPL v3' -__copyright__ = '2011, Darko Miletic ' -''' -blogs.tedneward.com -''' - -from calibre.web.feeds.news import BasicNewsRecipe - - -class InteroperabilityHappens(BasicNewsRecipe): - title = 'Interoperability Happens' - __author__ = 'Darko Miletic' - description = 'Tech blog by Ted Neward' - oldest_article = 15 - max_articles_per_feed = 100 - language = 'en' - encoding = 'utf-8' - no_stylesheets = True - use_embedded_content = True - publication_type = 'blog' - extra_css = """ - body{font-family: Verdana,Arial,Helvetica,sans-serif} - """ - - conversion_options = { - 'comment': description, 'tags': 'blog, technology, microsoft, programming, C#, Java', 'publisher': 'Ted Neward', 'language': language - } - - feeds = [(u'Posts', u'http://blogs.tedneward.com/SyndicationService.asmx/GetRss')] diff --git a/recipes/telam.recipe b/recipes/telam.recipe deleted file mode 100644 index 4fa81b0032..0000000000 --- a/recipes/telam.recipe +++ /dev/null @@ -1,57 +0,0 @@ -#!/usr/bin/env python -# -*- mode: python -*- -# -*- coding: utf-8 -*- - -__license__ = 'GPL v3' -__copyright__ = '2012-2016, Darko Miletic ' -''' -www.telam.com.ar -''' - -from calibre.web.feeds.news import BasicNewsRecipe - - -class Telam(BasicNewsRecipe): - title = 'Telam' - __author__ = 'Darko Miletic' - description = 'AGENCIA DE NOTICIAS DE LA REPUBLICA ARGENTINA' - publisher = 'Telam S.E.' - category = 'news, politics, Argentina' - oldest_article = 2 - max_articles_per_feed = 200 - no_stylesheets = True - encoding = 'windows-1252' - use_embedded_content = False - language = 'es_AR' - remove_empty_feeds = True - auto_cleanup = True - publication_type = 'newsportal' - PREFIX = 'http://www.telam.com.ar' - masthead_url = 'http://www.telam.com.ar/assets/img/logo.svg' - extra_css = """ - body{font-family: Arial,Helvetica,sans-serif } - img{margin-bottom: 0.4em; display:block} - """ - - conversion_options = { - 'comment': description, 'tags': category, 'publisher': publisher, 'language': language - } - - feeds = [ - (u'Ultimas noticias', u'http://www.telam.com.ar/rss2/ultimasnoticias.xml'), - (u'Politica', u'http://www.telam.com.ar/rss2/politica.xml'), - (u'Economia', u'http://www.telam.com.ar/rss2/economia.xml'), - (u'Sociedad', u'http://www.telam.com.ar/rss2/sociedad.xml'), - (u'Policiales', u'http://www.telam.com.ar/rss2/policiales.xml'), - (u'Internacionales', u'http://www.telam.com.ar/rss2/internacional.xml'), - (u'Espectaculos', u'http://www.telam.com.ar/rss2/espectaculos.xml'), - (u'Cultura', u'http://www.telam.com.ar/rss2/cultura.xml'), - (u'Deportes', u'http://www.telam.com.ar/rss2/deportes.xml'), - (u'Educacion', u'http://www.telam.com.ar/rss2/educacion.xml') - ] - - def get_article_url(self, article): - url = BasicNewsRecipe.get_article_url(self, article) - if url.startswith('/'): - return self.PREFIX + url - return url diff --git a/recipes/telepolis_artikel.recipe b/recipes/telepolis_artikel.recipe deleted file mode 100644 index bc1c8bbad0..0000000000 --- a/recipes/telepolis_artikel.recipe +++ /dev/null @@ -1,44 +0,0 @@ -# -*- coding: utf-8 -*- - -__license__ = 'GPL v3' -__copyright__ = '2009, Gerhard Aigner ' - -''' http://www.derstandard.at - Austrian Newspaper ''' -import re - -from calibre.web.feeds.news import BasicNewsRecipe - - -class TelepolisArtikel(BasicNewsRecipe): - title = u'Telepolis (Artikel)' - __author__ = 'Gerhard Aigner' - publisher = 'Heise Zeitschriften Verlag GmbH & Co KG' - category = 'news' - description = 'Telepolis Artikel' - language = 'de_AT' - oldest_article = 7 - max_articles_per_feed = 100 - recursion = 0 - no_stylesheets = True - - use_embedded_content = False - remove_empty_feeds = True - - remove_tags_before = dict(name='h1') - remove_tags = [dict(name='img')] - - feeds = [(u'Artikel', u'http://www.heise.de/tp/rss/news-a.rdf')] - - preprocess_regexps = [(re.compile(r']*>', re.DOTALL | re.IGNORECASE), lambda match: ''), - (re.compile(r'', re.DOTALL | re.IGNORECASE), lambda match: ''), ] - - html2lrf_options = [ - '--comment', description, '--category', category, '--publisher', publisher] - - html2epub_options = 'publisher="' + publisher + \ - '"\ncomments="' + description + '"\ntags="' + category + '"' - - def print_version(self, url): - p = re.compile(r'\d{5}', re.DOTALL | re.IGNORECASE) - m = p.search(url) - return "http://www.heise.de/bin/tp/issue/r4/dl-artikel2.cgi?artikelnr=" + m.group() + "&mode=print" diff --git a/recipes/thai_post_daily.recipe b/recipes/thai_post_daily.recipe deleted file mode 100644 index 1a04b797b8..0000000000 --- a/recipes/thai_post_daily.recipe +++ /dev/null @@ -1,40 +0,0 @@ -from calibre.web.feeds.news import BasicNewsRecipe - - -class AdvancedUserRecipe1299054026(BasicNewsRecipe): - title = u'Thai Post Daily' - __author__ = 'Chotechai P.' - language = 'th' - oldest_article = 7 - max_articles_per_feed = 100 - cover_url = 'http://upload.wikimedia.org/wikipedia/th/1/10/ThaiPost_Logo.png' - feeds = [ - (u'\u0e02\u0e48\u0e32\u0e27\u0e2b\u0e19\u0e49\u0e32\u0e2b\u0e19\u0e36\u0e48\u0e07', u'http://thaipost.net/taxonomy/term/1/all/feed'), - (u'\u0e1a\u0e17\u0e1a\u0e23\u0e23\u0e13\u0e32\u0e18\u0e34\u0e01\u0e32\u0e23', u'http://thaipost.net/taxonomy/term/11/all/feed'), - (u'\u0e40\u0e1b\u0e25\u0e27 \u0e2a\u0e35\u0e40\u0e07\u0e34\u0e19', u'http://thaipost.net/taxonomy/term/2/all/feed'), - (u'\u0e2a\u0e20\u0e32\u0e1b\u0e23\u0e30\u0e0a\u0e32\u0e0a\u0e19', u'http://thaipost.net/taxonomy/term/3/all/feed'), - (u'\u0e16\u0e39\u0e01\u0e17\u0e38\u0e01\u0e02\u0e49\u0e2d', u'http://thaipost.net/taxonomy/term/4/all/feed'), - (u'\u0e01\u0e32\u0e23\u0e40\u0e21\u0e37\u0e2d\u0e07', u'http://thaipost.net/taxonomy/term/5/all/feed'), - (u'\u0e17\u0e48\u0e32\u0e19\u0e02\u0e38\u0e19\u0e19\u0e49\u0e2d\u0e22', u'http://thaipost.net/taxonomy/term/12/all/feed'), - (u'\u0e1a\u0e17\u0e04\u0e27\u0e32\u0e21\u0e1e\u0e34\u0e40\u0e28\u0e29', u'http://thaipost.net/taxonomy/term/66/all/feed'), - (u'\u0e23\u0e32\u0e22\u0e07\u0e32\u0e19\u0e1e\u0e34\u0e40\u0e28\u0e29', u'http://thaipost.net/taxonomy/term/67/all/feed'), - (u'\u0e1a\u0e31\u0e19\u0e17\u0e36\u0e01\u0e2b\u0e19\u0e49\u0e32 4', u'http://thaipost.net/taxonomy/term/13/all/feed'), - (u'\u0e40\u0e2a\u0e35\u0e22\u0e1a\u0e0b\u0e36\u0e48\u0e07\u0e2b\u0e19\u0e49\u0e32', u'http://thaipost.net/taxonomy/term/64/all/feed'), - (u'\u0e04\u0e31\u0e19\u0e1b\u0e32\u0e01\u0e2d\u0e22\u0e32\u0e01\u0e40\u0e25\u0e48\u0e32', u'http://thaipost.net/taxonomy/term/65/all/feed'), - (u'\u0e40\u0e28\u0e23\u0e29\u0e10\u0e01\u0e34\u0e08', u'http://thaipost.net/taxonomy/term/6/all/feed'), - (u'\u0e01\u0e23\u0e30\u0e08\u0e01\u0e44\u0e23\u0e49\u0e40\u0e07\u0e32', u'http://thaipost.net/taxonomy/term/14/all/feed'), - (u'\u0e01\u0e23\u0e30\u0e08\u0e01\u0e2b\u0e31\u0e01\u0e21\u0e38\u0e21', u'http://thaipost.net/taxonomy/term/71/all/feed'), - (u'\u0e04\u0e34\u0e14\u0e40\u0e2b\u0e19\u0e37\u0e2d\u0e01\u0e23\u0e30\u0e41\u0e2a', u'http://thaipost.net/taxonomy/term/69/all/feed'), - (u'\u0e23\u0e32\u0e22\u0e07\u0e32\u0e19', u'http://thaipost.net/taxonomy/term/68/all/feed'), - (u'\u0e2d\u0e34\u0e42\u0e04\u0e42\u0e1f\u0e01\u0e31\u0e2a', u'http://thaipost.net/taxonomy/term/10/all/feed'), - (u'\u0e01\u0e32\u0e23\u0e28\u0e36\u0e01\u0e29\u0e32-\u0e2a\u0e32\u0e18\u0e32\u0e23\u0e13\u0e2a\u0e38\u0e02', u'http://thaipost.net/taxonomy/term/7/all/feed'), # noqa - (u'\u0e15\u0e48\u0e32\u0e07\u0e1b\u0e23\u0e30\u0e40\u0e17\u0e28', u'http://thaipost.net/taxonomy/term/8/all/feed'), - (u'\u0e01\u0e35\u0e2c\u0e32', u'http://thaipost.net/taxonomy/term/9/all/feed')] - - def print_version(self, url): - return url.replace(url, 'http://www.thaipost.net/print/' + url[32:]) - - remove_tags = [] - remove_tags.append(dict(name='div', attrs={'class': 'print-logo'})) - remove_tags.append(dict(name='div', attrs={'class': 'print-site_name'})) - remove_tags.append(dict(name='div', attrs={'class': 'print-breadcrumb'})) diff --git a/recipes/the_daily_news_egypt.recipe b/recipes/the_daily_news_egypt.recipe deleted file mode 100644 index e7cf75239c..0000000000 --- a/recipes/the_daily_news_egypt.recipe +++ /dev/null @@ -1,46 +0,0 @@ -__license__ = 'GPL v3' -__copyright__ = '2011, Pat Stapleton ' -''' -abc.net.au/news -''' -import re - -from calibre.web.feeds.recipes import BasicNewsRecipe - - -class TheDailyNewsEG(BasicNewsRecipe): - title = u'The Daily News Egypt' - __author__ = 'Omm Mishmishah' - description = 'News from Egypt' - masthead_url = 'http://www.thedailynewsegypt.com/images/DailyNews-03_05.gif' - cover_url = 'http://www.thedailynewsegypt.com/images/DailyNews-03_05.gif' - - auto_cleanup = True - oldest_article = 7 - max_articles_per_feed = 100 - no_stylesheets = False - use_embedded_content = False - encoding = 'utf8' - publisher = 'The Daily News Egypt' - category = 'News, Egypt, World' - language = 'en_EG' - publication_type = 'newsportal' -# preprocess_regexps = [(re.compile(r'', re.DOTALL), lambda m: '')] -# Remove annoying map links (inline-caption class is also used for some -# image captions! hence regex to match maps.google) - preprocess_regexps = [(re.compile( - r' 182 - compress_news_images = True - compress_news_images_max_size = 100 - cover_url = 'http://www.thecodelesscode.com/pages/case-9/Lotus-050.jpg' - credits = [u'

{0}

'.format(title), - u'

By Qi

', - u'

An illustrated collection of (sometimes violent) fables concerning the Art and Philosophy of software development, written in the spirit of Zen kōans

', # noqa - u'

eBook conversion courtesy of {0}

'.format(__author__)] - description = u'The Art and Philosophy of software development, written in the spirit of Zen kōans' - extra_css = '.article_date { display: none; float: right; } \ - .chapter_title { font-size: 1.75em; margin-top: 0; } \ - .chapter_title::first-letter { font-size: 1.35em; font-weight: 500; letter-spacing: -.05em; } \ - h2 { margin-top: 0; } \ - .image_wrapper { text-align: center; }' - index = 'http://www.thecodelesscode.com/contents' - language = 'en' - max_articles_per_feed = 1000 # I can only wish - path_remappings = {} # IE, /case/182 -> articles_72/index.html - publication_type = 'blog' - publisher = 'Qi' - resolve_internal_links = True - scale_news_images = (600, 400) - simultaneous_downloads = 1 - url = 'http://www.thecodelesscode.com' - - def parse_index(self): - koans = [] - - # Retrieve the contents page, containing the ToC - soup = self.index_to_soup(self.index) - - for koan in soup.findAll('tr'): - # BS has some trouble with the weird layout - tag = koan.find('a') - - if tag is None: - continue - if 'random' in tag['href']: - continue - - # Minor coding error causes calibre to glitch; use the current date - # for the most recent title - koan_date = koan.find('td', attrs={'class': 'toc-date'}) - if koan_date is None: - koan_date = date.isoformat(date.today()) - else: - koan_date = koan_date.string - - title = tag.string - url = self.url + tag['href'] - - if u'The Applicant' in title: - continue # Only the main story - - koans.append({ - 'content': '', - 'date': koan_date, - 'description': '', - 'title': title, - 'url': url, - }) - - # ie, Mousetrap -> 182 - self.chapters[title] = url.split('/')[-1] - - # Oldest koans first - koans.reverse() - - # Log and then get out of here - self.log("Found {0} koans".format(len(koans))) - return([(self.title, koans)]) - - def preprocess_html(self, soup): - title = soup.find('h1', attrs={'class': 'title'}).find( - 'a', attrs={'class': 'subtle'}).string - - # Add a title at the beginning of each chapter - if title in self.chapters: - title = '
{0}
'.format(title) - - # Load up the actual story - koan = soup.find('div', attrs={'class': 'story koan'}) - - # Kind of a hack-y way to get .children in BS3
- # -> - contents = list(koan.contents) - koan = bs(title) - - for i in reversed(contents): - koan.insert(1, i) - - # Remove all anchors that don't contain /case/, leaving them as just their text - # Note that we'll come back and clean up /case/ links when the URLs are remapped - # during postprocess_book() - anchors = koan.findAll('a') - if anchors != []: - for anchor in anchors: - if '/case/' in anchor['href']: - pass - elif 'note' in anchor['href']: - anchor.replaceWith('') - else: - # Again, a hacky way to get the contents of the tag, thanks - # to BS3 - contents = list(anchor.contents) - linktext = bs() - for i in reversed(contents): - linktext.insert(1, i) - anchor.replaceWith(linktext) - - # Find all the images, and wrap them up in an image_wrapper div - for i in range(0, len(koan.contents), 1): - if not hasattr(koan.contents[i], 'name'): - continue # skip carriage returns - if koan.contents[i].name == u'img': - div = bs('
') - div.div.insert(0, koan.contents[i]) - koan.insert(i, div) - - return(koan) - - def canonicalize_internal_url(self, url, is_link=True): - url = url.split(self.url)[-1] - return BasicNewsRecipe.canonicalize_internal_url(self, url, is_link=is_link) - - def postprocess_book(self, oeb, opts, log): - # Go through each internal representation of each HTML file, and fix - # all the broken hrefs, if possible - for item in oeb.manifest.items: - if item.media_type == 'text/html': - - for node in item.data.xpath('//*[@href]'): - naughty_href = node.get('href') - - if naughty_href in self.path_remappings: - node.set('href', '../' + - self.path_remappings[naughty_href]) - href = node.get('href') - self.log( - "Remapped href {0} --> {1}".format(naughty_href, href)) - - # Remove the superfluous extra feed page at the beginning of the book, replacing it - # with the proper credits - for item in oeb.manifest.hrefs['index.html'].data.xpath('//*[local-name()="ul"]'): - item.getparent().remove(item) - - for item in oeb.manifest.hrefs['index.html'].data.xpath('//*[local-name()="p"]'): - item.getparent().remove(item) - - for item in oeb.manifest.hrefs['index.html'].data.xpath('//*[local-name()="div"]'): - for credit in self.credits[::-1]: - item.insert(0, etree.fromstring(credit, parser=etree.XMLParser(recover=True, no_network=True, resolve_entities=False))) - - # Change the creator from "calibre" to the actual author - # Also, we don't need the date in the ebook's title - oeb.metadata.items['creator'][0].value = self.publisher - oeb.metadata.items['description'][0].value = oeb.metadata.items[ - 'description'][0].value.split('\n\nArticles in this issue')[0] - oeb.metadata.items['publication_type'][0].value = self.title - oeb.metadata.items['publisher'][0].value = self.publisher - oeb.metadata.items['title'][0].value = self.title diff --git a/recipes/thedgesingapore.recipe b/recipes/thedgesingapore.recipe deleted file mode 100644 index be20a26ba6..0000000000 --- a/recipes/thedgesingapore.recipe +++ /dev/null @@ -1,52 +0,0 @@ -#!/usr/bin/env python - -__license__ = 'GPL v3' -__copyright__ = '2009, Darko Miletic ' -''' -www.livemint.com -''' - -from calibre.web.feeds.news import BasicNewsRecipe - - -class Edgesingapore(BasicNewsRecipe): - title = 'The Edge Singapore' - __author__ = 'Darko Miletic' - description = 'Financial news from Singapore' - publisher = 'The Edge Singapore' - category = 'news, finances, singapore' - language = 'en' - - lang = 'en_SG' - oldest_article = 15 - max_articles_per_feed = 100 - no_stylesheets = True - encoding = 'utf-8' - use_embedded_content = False - extra_css = ' .contentheading{font-size: x-large} .small{font-size: small} .createdate{font-size: small; font-weight: bold} ' - - conversion_options = { - 'comment': description, 'tags': category, 'publisher': publisher, 'author': publisher, 'language': lang, 'pretty_print': True, 'linearize_tables': True - } - - remove_tags = [ - dict(name=['object', 'link', 'embed', 'form', 'iframe']), dict(name='div', attrs={ - 'id': 'toolbar-article'}), dict(name='div', attrs={'class': 'backtotop'}), dict(name='img', attrs={'alt': 'Print'}) - ] - - remove_tags_after = dict(name='div', attrs={'class': 'backtotop'}) - - feeds = [(u'Articles', u'http://feeds.feedburner.com/edgesg')] - - def print_version(self, url): - return url + '?tmpl=component&print=1' - - def preprocess_html(self, soup): - attribs = ['style', 'font', 'valign', 'colspan', 'width', 'height', 'rowspan', 'summary', 'align', 'cellspacing', 'cellpadding', 'frames', 'rules', 'border' # noqa - ] - for item in soup.body.findAll(name=['table', 'td', 'tr', 'th', 'caption', 'thead', 'tfoot', 'tbody', 'colgroup', 'col']): - item.name = 'div' - for attrib in attribs: - item[attrib] = '' - del item[attrib] - return self.adeify_images(soup) diff --git a/recipes/theluminouslandscape.recipe b/recipes/theluminouslandscape.recipe deleted file mode 100644 index b796b3052a..0000000000 --- a/recipes/theluminouslandscape.recipe +++ /dev/null @@ -1,34 +0,0 @@ - -__license__ = 'GPL v3' -__copyright__ = '2010, Darko Miletic ' -''' -luminous-landscape.com -''' - -from calibre.web.feeds.news import BasicNewsRecipe - - -class theluminouslandscape(BasicNewsRecipe): - title = 'The Luminous Landscape' - __author__ = 'Darko Miletic' - description = 'A photography news and information website in the form of a weblog with multiple authors who write on a variety of photography and art-photography related issues.' # noqa - publisher = 'The Luminous Landscape ' - category = 'news, blog, photograph, international' - oldest_article = 15 - max_articles_per_feed = 100 - no_stylesheets = True - remove_empty_feeds = True - use_embedded_content = True - encoding = 'cp1252' - language = 'en' - - conversion_options = { - 'comment': description, 'tags': category, 'publisher': publisher, 'language': language - } - - feeds = [ - (u"What's new", u'http://www.luminous-landscape.com/whatsnew/rssfeed.php')] - remove_tags = [dict(name=['object', 'link', 'iframe'])] - - def preprocess_html(self, soup): - return self.adeify_images(soup) diff --git a/recipes/themarketticker.recipe b/recipes/themarketticker.recipe deleted file mode 100644 index 4a5fd1196e..0000000000 --- a/recipes/themarketticker.recipe +++ /dev/null @@ -1,24 +0,0 @@ -#!/usr/bin/env python - -__license__ = 'GPL v3' -__copyright__ = '2008, Darko Miletic ' -''' -market-ticker.denninger.net -''' - -from calibre.web.feeds.news import BasicNewsRecipe - - -class Themarketticker(BasicNewsRecipe): - title = 'The Market Ticker' - __author__ = 'Darko Miletic' - description = 'Commentary On The Capital Markets' - oldest_article = 7 - max_articles_per_feed = 100 - language = 'en' - - no_stylesheets = True - use_embedded_content = True - html2lrf_options = ['--comment', description, '--category', 'blog,news,finances', '--base-font-size', '10' - ] - feeds = [(u'Posts', u'http://market-ticker.denninger.net/feeds/index.rss2')] diff --git a/recipes/themorningpaper.recipe b/recipes/themorningpaper.recipe deleted file mode 100644 index 0cf4263873..0000000000 --- a/recipes/themorningpaper.recipe +++ /dev/null @@ -1,42 +0,0 @@ -#!/usr/bin/env python -# -*- mode: python -*- -# -*- coding: utf-8 -*- - -__license__ = 'GPL v3' -__copyright__ = '2017, Darko Miletic ' -''' -blog.acolyer.org -''' - -from calibre.web.feeds.news import BasicNewsRecipe - - -class Themorningpaper(BasicNewsRecipe): - title = 'The Morning Paper' - __author__ = 'Darko Miletic' - description = ('an interesting/influential/important paper from' - ' the world of CS every weekday morning, as selected by Adrian Colyer') - publisher = 'Adrian Colyer' - category = 'news, tech' - oldest_article = 180 - max_articles_per_feed = 200 - no_stylesheets = True - encoding = 'utf-8' - use_embedded_content = False - language = 'en' - remove_empty_feeds = True - auto_cleanup = True - publication_type = 'blog' - extra_css = """ - body{font-family: Georgia,Palatino,serif } - img{margin-bottom: 0.4em; display:block} - """ - - conversion_options = { - 'comment': description, - 'tags': category, - 'publisher': publisher, - 'language': language - } - - feeds = [(u'Articles', u'https://blog.acolyer.org/feed/')] diff --git a/recipes/thenews.recipe b/recipes/thenews.recipe deleted file mode 100644 index 6d71543cac..0000000000 --- a/recipes/thenews.recipe +++ /dev/null @@ -1,88 +0,0 @@ -from calibre.web.feeds.news import BasicNewsRecipe - - -class TheNewsRecipe(BasicNewsRecipe): - __license__ = 'GPL v3' - __author__ = 'kwetal' - language = 'en_PK' - version = 1 - - title = u'The News' - publisher = u'Jang Group' - category = u'News, Pakistan' - description = u'English Newspaper from Pakistan' - - use_embedded_content = False - remove_empty_feeds = True - oldest_article = 2 - max_articles_per_feed = 100 - - no_stylesheets = True - remove_javascript = True - encoding = 'iso-8859-1' - - remove_tags = [] - remove_tags.append(dict(name='img', attrs={'src': 'images/thenews.gif'})) - remove_tags.append(dict(name='img', attrs={'src': 'images/shim.gif'})) - - # Feeds from http://thenews.com.pk/rss.asp - feeds = [] - feeds.append( - (u'Latest Stories', u'http://www.thenews.com.pk/rss/thenews_updates.xml')) - feeds.append( - (u'Top Stories', u'http://www.thenews.com.pk/rss/thenews_topstories.xml')) - feeds.append( - (u'World News', u'http://www.thenews.com.pk/rss/thenews_world.xml')) - feeds.append( - (u'National News', u'http://www.thenews.com.pk/rss/thenews_national.xml')) - feeds.append( - (u'Business News', u'http://www.thenews.com.pk/rss/thenews_business.xml')) - feeds.append( - (u'Karachi News', u'http://www.thenews.com.pk/rss/thenews_karachi.xml')) - feeds.append( - (u'Lahore News', u'http://www.thenews.com.pk/rss/thenews_lahore.xml')) - feeds.append( - (u'Islamabad News', u'http://www.thenews.com.pk/rss/thenews_islamabad.xml')) - feeds.append( - (u'Peshawar News', u'http://www.thenews.com.pk/rss/thenews_peshawar.xml')) - feeds.append( - (u'Editorial', u'http://www.thenews.com.pk/rss/thenews_editorial.xml')) - feeds.append( - (u'Opinion', u'http://www.thenews.com.pk/rss/thenews_opinion.xml')) - feeds.append( - (u'Sports News', u'http://www.thenews.com.pk/rss/thenews_sports.xml')) - feeds.append( - (u'Newspost', u'http://www.thenews.com.pk/rss/thenews_newspost.xml')) - - conversion_options = {'comments': description, 'tags': category, 'language': 'en', - 'publisher': publisher, 'linearize_tables': True} - - extra_css = ''' - body{font-family:verdana,arial,helvetica,geneva,sans-serif;} - .heading_txt {font-size: x-large; font-weight: bold; text-align: left;} - .small_txt {text-align: left;} - .dateline {font-size: x-small; color: #696969; margin-top: 1em; margin-bottom: 1em} - ''' - - def print_version(self, url): - ignore, sep, main = url.rpartition('/') - - if main.startswith('updates.asp'): - return url.replace('updates.asp', 'print.asp') - elif main.startswith('top_story_detail.asp'): - return url.replace('top_story_detail.asp', 'print3.asp') - elif main.startswith('daily_detail.asp'): - return url.replace('daily_detail.asp', 'print1.asp') - else: - return None - - def preprocess_html(self, soup): - for tr in soup.findAll('tr', attrs={'bgcolor': True}): - del tr['bgcolor'] - - td = soup.find('td', attrs={'class': 'small_txt', 'height': '20'}) - if td: - del td['height'] - td['class'] = 'dateline' - - return soup diff --git a/recipes/theoldfoodie.recipe b/recipes/theoldfoodie.recipe deleted file mode 100644 index 744ab4d1d3..0000000000 --- a/recipes/theoldfoodie.recipe +++ /dev/null @@ -1,29 +0,0 @@ -#!/usr/bin/env python - -__license__ = 'GPL v3' -__copyright__ = '2009, Darko Miletic ' -''' -www.theoldfoodie.com -''' - -from calibre.web.feeds.news import BasicNewsRecipe - - -class TheOldFoodie(BasicNewsRecipe): - title = 'The Old Foodie' - __author__ = 'Darko Miletic' - description = 'Food blog' - category = 'cuisine, food, blog' - oldest_article = 30 - max_articles_per_feed = 100 - use_embedded_content = True - no_stylesheets = True - encoding = 'utf-8' - language = 'en' - - conversion_options = { - 'comments': description, 'tags': category, 'language': 'en' - } - - feeds = [ - (u'Articles', u'http://www.theoldfoodie.com/feeds/posts/default?alt=rss')] diff --git a/recipes/theonion.recipe b/recipes/theonion.recipe deleted file mode 100644 index af97c0169a..0000000000 --- a/recipes/theonion.recipe +++ /dev/null @@ -1,89 +0,0 @@ -__license__ = 'GPL v3' -__copyright__ = '2009-2013, Darko Miletic ' - -''' -theonion.com -''' - -from calibre.web.feeds.news import BasicNewsRecipe - - -class TheOnion(BasicNewsRecipe): - title = 'The Onion' - __author__ = 'Darko Miletic' - description = "The Onion, America's Finest News Source, is an award-winning publication covering world, national, and * local issues. It is updated daily online and distributed weekly in select American cities." # noqa - oldest_article = 2 - max_articles_per_feed = 100 - publisher = 'Onion, Inc.' - category = 'humor, news, USA' - language = 'en' - no_stylesheets = True - use_embedded_content = False - encoding = 'utf-8' - publication_type = 'newsportal' - needs_subscription = 'optional' - masthead_url = 'http://www.theonion.com/static/onion/img/logo_1x.png' - cover_url = 'http://www.theonion.com/static/onion/img/logo_1x.png' - extra_css = """ - body{font-family: Helvetica,Arial,sans-serif} - .section_title{color: gray; text-transform: uppercase} - .title{font-family: Georgia,serif} - .meta{color: gray; display: inline} - .has_caption{display: block} - .caption{font-size: x-small; color: gray; margin-bottom: 0.8em} - """ - - conversion_options = { - 'comment': description, 'tags': category, 'publisher': publisher, 'language': language - } - - keep_only_tags = [ - dict(attrs={'class': lambda x: x and 'content-wrapper' in x.split()})] - remove_attributes = ['lang', 'rel'] - remove_tags = [ - dict(name=['object', 'link', 'iframe', 'base', 'meta', 'button', 'footer', 'blockquote', 'figcaption']), dict(attrs={'class': lambda x: x and 'share-tools' in x.split()}), dict(attrs={'class': lambda x: x and 'content-meta' in x.split()}), dict(attrs={'class': 'below-article-tools'}), dict(name='div', attrs={'id': ['topshare', 'bottomshare']}) # noqa - ] - - feeds = [ - (u'Daily', u'http://feeds.theonion.com/theonion/daily'), (u'Sports', - u'http://feeds.theonion.com/theonion/sports') - ] - - def get_browser(self): - br = BasicNewsRecipe.get_browser(self) - br.open('http://www.theonion.com/') - if self.username is not None and self.password is not None: - br.open('https://ui.ppjol.com/login/onion/u/j_spring_security_check') - br.select_form(name='f') - br['j_username'] = self.username - br['j_password'] = self.password - br.submit() - return br - - def get_article_url(self, article): - artl = BasicNewsRecipe.get_article_url(self, article) - if artl.startswith('http://www.theonion.com/audio/'): - artl = None - return artl - - def preprocess_html(self, soup): - for item in soup.findAll(style=True): - del item['style'] - for item in soup.findAll('a'): - limg = item.find('img') - if item.string is not None: - str = item.string - item.replaceWith(str) - else: - if limg: - item.name = 'div' - item.attrs = [] - if not limg.get('alt'): - limg['alt'] = 'image' - else: - str = self.tag_to_string(item) - item.replaceWith(str) - for item in soup.findAll('img'): - if item.get('data-src'): - item['src'] = item['data-src'] - return soup diff --git a/recipes/thewest_au.recipe b/recipes/thewest_au.recipe deleted file mode 100644 index a035a2d6b6..0000000000 --- a/recipes/thewest_au.recipe +++ /dev/null @@ -1,62 +0,0 @@ -__license__ = 'GPL v3' -__copyright__ = '2010, Darko Miletic ' -''' -thewest.com.au -''' - -import re - -from calibre.web.feeds.news import BasicNewsRecipe - - -class TheWest(BasicNewsRecipe): - title = 'The West Australian' - __author__ = 'Darko Miletic' - description = 'News from Australia' - publisher = 'thewest.com.au' - category = 'news, politics, Australia' - oldest_article = 2 - max_articles_per_feed = 200 - no_stylesheets = True - encoding = 'utf8' - use_embedded_content = False - language = 'en_AU' - remove_empty_feeds = True - publication_type = 'newspaper' - masthead_url = 'http://l.yimg.com/ao/i/mp/properties/news/02/wan/img/wan-logo-h49.png' - extra_css = ' .article{font-family: Arial,Helvetica,sans-serif } .image{font-size: x-small} ' - - preprocess_regexps = [ - (re.compile(r'.*?', re.DOTALL | - re.IGNORECASE), lambda match: '') - ] - - conversion_options = { - 'comment': description, 'tags': category, 'publisher': publisher, 'language': language - } - - remove_tags = [ - dict(attrs={'class': ['tools', 'lhs']}), dict(attrs={ - 'id': 'tools-bottom'}), dict(attrs={'href': 'http://twitter.com/thewest_com_au'}) - ] - keep_only_tags = [dict(attrs={'class': 'mod article'})] - remove_attributes = ['width', 'height'] - - feeds = [ - - (u'WA News', u'http://d.yimg.com/au.rss.news.yahoo.com/thewest/wa.xml'), - (u'National', u'http://d.yimg.com/au.rss.news.yahoo.com/thewest/national.xml'), - (u'World', u'http://d.yimg.com/au.rss.news.yahoo.com/thewest/world.xml'), - (u'Offbeat', u'http://d.yimg.com/au.rss.news.yahoo.com/thewest/offbeat.xml'), - (u'Business', u'http://d.yimg.com/au.rss.news.yahoo.com/thewest/business.xml'), - (u'Sport', u'http://d.yimg.com/au.rss.news.yahoo.com/thewest/sport.xml'), - (u'Entertainment', u'http://d.yimg.com/au.rss.news.yahoo.com/thewest/entertainment.xml'), - (u'Travel', u'http://d.yimg.com/au.rss.news.yahoo.com/thewest/travel.xml'), - (u'Life+Style', u'http://d.yimg.com/au.rss.news.yahoo.com/thewest/lifestyle.xml') - ] - - def get_article_url(self, article): - return article.get('guid', None) - - def preprocess_html(self, soup): - return self.adeify_images(soup) diff --git a/recipes/think_progress.recipe b/recipes/think_progress.recipe deleted file mode 100644 index 8d1a4b7924..0000000000 --- a/recipes/think_progress.recipe +++ /dev/null @@ -1,13 +0,0 @@ -from calibre.web.feeds.news import BasicNewsRecipe - - -class AdvancedUserRecipe1263409732(BasicNewsRecipe): - title = u'Think Progress' - description = u'A compilation of progressive articles on social and economic justice, healthy communities, media accountability, global and domestic security.' # noqa - __author__ = u'Xanthan Gum' - language = 'en' - - oldest_article = 7 - max_articles_per_feed = 100 - - feeds = [(u'News Articles', u'http://thinkprogress.org/feed/')] diff --git a/recipes/thn.recipe b/recipes/thn.recipe deleted file mode 100644 index efd0e183f9..0000000000 --- a/recipes/thn.recipe +++ /dev/null @@ -1,19 +0,0 @@ -from calibre.web.feeds.news import BasicNewsRecipe - - -class AdvancedUserRecipe1289990851(BasicNewsRecipe): - title = u'The Hockey News' - language = 'en_CA' - __author__ = 'Nexus' - oldest_article = 7 - max_articles_per_feed = 25 - no_stylesheets = True - remove_tags = [dict(name='div', attrs={'class': 'article_info'}), - dict(name='div', attrs={'class': 'photo_details'}), - dict(name='div', attrs={'class': 'tool_menu'}), - dict(name='div', attrs={'id': 'comments_container'}), - dict(name='div', attrs={'id': 'wrapper'})] - keep_only_tags = [dict(name='h1', attrs={'class': ['headline']}), - dict(name='div', attrs={'class': ['box_container']})] - - feeds = [(u'THN', u'http://www.thehockeynews.com/rss/all_categories.xml')] diff --git a/recipes/tidbits.recipe b/recipes/tidbits.recipe deleted file mode 100644 index d2e9af8574..0000000000 --- a/recipes/tidbits.recipe +++ /dev/null @@ -1,51 +0,0 @@ - -__license__ = 'GPL v3' -__copyright__ = '2010, Darko Miletic ' -''' -db.tidbits.com -''' - -from calibre.web.feeds.news import BasicNewsRecipe - - -class TidBITS(BasicNewsRecipe): - title = 'TidBITS: Mac News for the Rest of Us' - __author__ = 'Darko Miletic' - description = 'Insightful news, reviews, and analysis of the Macintosh and Internet worlds' - publisher = 'TidBITS Publishing Inc.' - category = 'news, Apple, Macintosh, IT, Internet' - oldest_article = 2 - max_articles_per_feed = 100 - no_stylesheets = True - encoding = 'utf-8' - use_embedded_content = False - language = 'en' - remove_empty_feeds = True - masthead_url = 'http://db.tidbits.com/images/tblogo9.gif' - extra_css = ' body{font-family: Georgia,"Times New Roman",Times,serif} ' - - conversion_options = { - 'comment': description, 'tags': category, 'publisher': publisher, 'language': language - } - - keep_only_tags = [dict(name='div', attrs={'id': 'center_ajax_sub'})] - remove_tags = [dict(name='div', attrs={'id': 'social-media'})] - - feeds = [ - - (u'Business Apps', u'http://db.tidbits.com/feeds/business.rss'), - (u'Entertainment', u'http://db.tidbits.com/feeds/entertainment.rss'), - (u'External Links', u'http://db.tidbits.com/feeds/links.rss'), - (u'Home Mac', u'http://db.tidbits.com/feeds/home.rss'), - (u'Inside TidBITS', u'http://db.tidbits.com/feeds/inside.rss'), - (u'iPod & iPhone', u'http://db.tidbits.com/feeds/ipod-iphone.rss'), - (u'Just for Fun', u'http://db.tidbits.com/feeds/fun.rss'), - (u'Macs & Mac OS X', u'http://db.tidbits.com/feeds/macs.rss'), - (u'Media Creation', u'http://db.tidbits.com/feeds/creative.rss'), - (u'Networking & Communications', u'http://db.tidbits.com/feeds/net.rss'), - (u'Opinion & Editorial', u'http://db.tidbits.com/feeds/opinion.rss'), - (u'Support & Problem Solving', u'http://db.tidbits.com/feeds/support.rss'), - (u'Safe Computing', u'http://db.tidbits.com/feeds/security.rss'), - (u'Tech News', u'http://db.tidbits.com/feeds/tech.rss'), - (u'Software Watchlist', u'http://db.tidbits.com/feeds/watchlist.rss') - ] diff --git a/recipes/tijolaco.recipe b/recipes/tijolaco.recipe deleted file mode 100644 index bd2200c172..0000000000 --- a/recipes/tijolaco.recipe +++ /dev/null @@ -1,25 +0,0 @@ -from calibre.web.feeds.recipes import BasicNewsRecipe - - -class Tijolaco(BasicNewsRecipe): - title = u'Tijolaco.com' - __author__ = u'Diniz Bortolotto' - description = u'Posts do Blog Tijola\xe7o.com' - oldest_article = 7 - max_articles_per_feed = 50 - encoding = 'utf8' - publisher = u'Brizola Neto' - category = 'politics, Brazil' - language = 'pt_BR' - publication_type = 'politics portal' - use_embedded_content = False - no_stylesheets = True - remove_javascript = True - - feeds = [(u'Blog Tijola\xe7o.com', u'http://feeds.feedburner.com/Tijolacoblog')] - - reverse_article_order = True - - keep_only_tags = [dict(name='div', attrs={'class': 'post'})] - - remove_tags = [dict(name='span', attrs={'class': 'com'})] diff --git a/recipes/time_turk.recipe b/recipes/time_turk.recipe deleted file mode 100644 index 4a47fdee3b..0000000000 --- a/recipes/time_turk.recipe +++ /dev/null @@ -1,14 +0,0 @@ -# -*- coding: utf-8 -*- - -from calibre.web.feeds.news import BasicNewsRecipe - - -class BasicUserRecipe1325259641(BasicNewsRecipe): - language = 'tr' - __author__ = 'asalet_r' - title = u'TimeT\xfcrk' - oldest_article = 7 - max_articles_per_feed = 100 - auto_cleanup = True - - feeds = [(u'TimeT\xfcrk', u'http://www.timeturk.com/tr/rss/')] diff --git a/recipes/timesnewroman.recipe b/recipes/timesnewroman.recipe deleted file mode 100644 index 8ce8a56f6f..0000000000 --- a/recipes/timesnewroman.recipe +++ /dev/null @@ -1,50 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- - -__license__ = 'GPL v3' -__copyright__ = u'2011, Silviu Cotoar\u0103' -''' -timesnewroman.ro -''' - -from calibre.web.feeds.news import BasicNewsRecipe - - -class TimesNewRoman(BasicNewsRecipe): - title = u'Times New Roman' - __author__ = u'Silviu Cotoar\u0103' - description = u'Cotidian independent de umor voluntar' - publisher = u'Times New Roman' - oldest_article = 25 - language = 'ro' - max_articles_per_feed = 100 - no_stylesheets = True - use_embedded_content = False - category = 'Ziare,Reviste,Fun' - encoding = 'utf-8' - cover_url = 'http://www.timesnewroman.ro/templates/TNRV2/images/logo.gif' - - conversion_options = { - 'comments': description, 'tags': category, 'language': language, 'publisher': publisher - } - - keep_only_tags = [ - dict(name='div', attrs={'id': 'page'}) - ] - - remove_tags = [ - dict(name='p', attrs={'class': ['articleinfo']}), dict(name='div', attrs={'class': ['shareTools']}), dict( - name='div', attrs={'class': 'fb_iframe_widget'}), dict(name='div', attrs={'id': 'jc'}) - ] - - remove_tags_after = [ - dict(name='div', attrs={'class': 'fb_iframe_widget'}), - dict(name='div', attrs={'id': 'jc'}) - ] - - feeds = [ - (u'Feeds', u'http://www.timesnewroman.ro/index.php?format=feed&type=rss') - ] - - def preprocess_html(self, soup): - return self.adeify_images(soup) diff --git a/recipes/tnxm.recipe b/recipes/tnxm.recipe deleted file mode 100644 index f2b84ca7a0..0000000000 --- a/recipes/tnxm.recipe +++ /dev/null @@ -1,30 +0,0 @@ -#!/usr/bin/env python - -__license__ = 'GPL v3' -__copyright__ = '2009, Wasabi ' -''' -tnxm.net -''' - -from calibre.web.feeds.news import BasicNewsRecipe - - -class TNXM(BasicNewsRecipe): - title = u'Thanh Nien Xa Me' - __author__ = 'Wasabi' - description = 'Vietnam news and current affairs from TNXM - the finest Vietnamese bulletin board.' - no_stylesheets = True - language = 'vi' - - encoding = 'utf-8' - recursions = 0 - - remove_tags = [dict(name='div', attrs={'class': 'footer'})] - extra_css = '.headline {font-size: x-large;} \n .fact { padding-top: 10pt }' - - feeds = [ - ('Index', 'http://tnxm.net/external.php?type=RSS'), - ] - - def print_version(self, url): - return url.replace('showthread.php?', 'printthread.php?pp=160&') diff --git a/recipes/today_online.recipe b/recipes/today_online.recipe deleted file mode 100644 index bce8d75c5b..0000000000 --- a/recipes/today_online.recipe +++ /dev/null @@ -1,63 +0,0 @@ -from calibre.ptempfile import PersistentTemporaryFile -from calibre.web.feeds.news import BasicNewsRecipe - - -class AdvancedUserRecipe1276486274(BasicNewsRecipe): - title = u'Today Online - Singapore' - publisher = 'MediaCorp Press Ltd - Singapore' - __author__ = 'rty' - category = 'news, Singapore' - oldest_article = 7 - max_articles_per_feed = 100 - remove_javascript = True - use_embedded_content = False - no_stylesheets = True - language = 'en_SG' - temp_files = [] - articles_are_obfuscated = True - masthead_url = 'http://www.todayonline.com/sites/all/themes/today/logo.png' - conversion_options = {'linearize_tables': True} - extra_css = ''' - .author{font-style: italic; font-size: small} - .date{font-style: italic; font-size: small} - .Headline{font-weight: bold; font-size: xx-large} - .headerStrap{font-weight: bold; font-size: x-large; font-syle: italic} - .bodyText{font-size: 4px;font-family: Times New Roman;} - ''' - feeds = [ - (u'Hot News', u'http://www.todayonline.com/hot-news/feed'), - (u'Singapore', u'http://www.todayonline.com/feed/singapore'), - (u'World', u'http://www.todayonline.com/feed/world'), - (u'Business', u'http://www.todayonline.com/feed/business'), - (u'Tech', u'http://www.todayonline.com/feed/tech'), - (u'Voices', u'http://www.todayonline.com/feed/voices'), - (u'Commentary', u'http://www.todayonline.com/feed/Commentary'), - (u'Daily Focus', u'http://www.todayonline.com/feed/daily-focus'), - (u'Lifestyle', u'http://www.todayonline.com/feed/lifestyle'), - ] - keep_only_tags = [ - dict(name='div', attrs='print-content') - ] - - remove_tags = [ - dict(name='div', attrs={'class': ['url', 'button']}), - dict(name='div', attrs={'class': 'node-type-print-edition'}), - dict(name='div', attrs={'class': ['field field-name-field-article-section field-type-taxonomy-term-reference field-label-hidden', - 'field field-name-field-article-abstract field-type-text-long field-label-hidden', 'authoring']}) - - ] - - def get_obfuscated_article(self, url): - br = self.get_browser() - br.open(url) - response = br.follow_link(url_regex=r'/print/', nr=0) - html = response.read() - self.temp_files.append(PersistentTemporaryFile('_fa.html')) - self.temp_files[-1].write(html) - self.temp_files[-1].close() - return self.temp_files[-1].name - - def preprocess_html(self, soup): - for item in soup.findAll(style=True): - del item['style'] - return soup diff --git a/recipes/todays_zaman.recipe b/recipes/todays_zaman.recipe deleted file mode 100644 index 3058fb5083..0000000000 --- a/recipes/todays_zaman.recipe +++ /dev/null @@ -1,177 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- -from __future__ import print_function - -__license__ = 'GPL v3' -__copyright__ = '2014, spswerling' -''' -www.todayszaman.com -''' -import re - -from calibre.web.feeds.recipes import BasicNewsRecipe - -try: - from urllib.parse import urljoin -except ImportError: - from urlparse import urljoin - - -class TodaysZaman(BasicNewsRecipe): - - title = u'Todays Zaman' - __author__ = u'spswerling' - description = 'English version of Turkish Daily "Zaman"' - max_articles_per_feed = 100 - encoding = 'utf-8' - category = 'news' - language = 'en_TR' - publication_type = 'newspaper' - cover_img_url = 'http://medya.todayszaman.com/todayszaman/images/logo/todays_yenilogo.bmp' # yep, bmp - masthead_url = cover_img_url - remove_empty_feeds = True - - # on kindle, images can make things kind of fat. Slim them down. - recursions = 0 - oldest_article = 1.5 - compress_news_images = True - compress_news_images_max_size = 7 - scale_news_images = (150, 200) # (kindle touch: 600x800) - useHighResImages = False - - sections = [ - (u'Columnists', u'columnists'), - (u'Opinion', u'op-ed'), - (u'World', u'world'), - (u'National', u'national'), - (u'Diplomacy', u'diplomacy'), - (u'Business', u'business'), - ] - - # util for creating remove_tags and keep_tags style regex matchers - def tag_matcher(elt, attr, str): - return dict(name=elt, attrs={attr: re.compile(str, re.IGNORECASE)}) - - keep_only_tags = [ - tag_matcher('div', 'class', '^pageNewsDetailContainer$'), - tag_matcher('div', 'class', '^pageColumnistDetailContainer$'), - ] - - remove_tags = [ - tag_matcher('div', 'class', 'DetailKeyword'), - tag_matcher('div', 'class', 'MainContentSocial'), - tag_matcher('div', 'class', 'SocialNetwork'), - tag_matcher('div', 'class', 'DetailLeftOther'), - tag_matcher('div', 'class', 'RelatedNews'), - tag_matcher('div', 'class', '^topMenuWrapper$'), - tag_matcher('div', 'class', '^logo$'), - tag_matcher('a', 'class', 'cf_email'), - ] - articles = {} - - def parse_index(self): - for (sect_title, sect_uri) in self.sections: - self.parse_section(sect_title, sect_uri) - - ans = [] - for k in self.articles: - ans.append((k, self.articles[k])) - return ans - - def parse_section(self, sect_title, sect_uri): - url = 'http://www.todayszaman.com/' + sect_uri - print('Start section ' + sect_title + ', ' + url) - try: - soup = self.index_to_soup(url) - except: - return - - # Find each article - for div in soup.findAll('div'): - div_class = div.get('class') - if div_class: - if div_class in ['pageColumnistsMainContent', - 'pageCategoryContainer']: - # print ' DIVCLASS' + div_class - for link in div.findAll('a', href=True): - self.process_link(sect_title, div_class, link) - - print('Finished section: ' + sect_title) - - def process_link(self, section_title, layout, link): - def p(s): - print('[PROCESS LINK] ' + s[0:80]) - - href = link['href'] - full_href = urljoin('http://www.todayszaman.com/', href) - next_sib = link.nextSibling - child_h2 = link.find('h2') - link_text = self.tag_to_string(link).strip() - title_node = None - - if layout in ['pageColumnistsMainContent']: - if child_h2: - title_node = child_h2 - else: - return - elif layout in ['pageCategoryContainer']: - top_title = link.find(attrs={'class': 'pageCategoryTopTitle'}) - if top_title: - title_node = top_title - elif (not link_text) and (next_sib and next_sib.find('h4')): - title_node = next_sib.find('h4') - elif (not link_text) and (next_sib and next_sib.find('h3')): - title_node = next_sib.find('h3') - elif link_text: - title_node = link - - if title_node: - title = self.tag_to_string(title_node) - # print ' BING: ' + href + ', ' + title - self.queue_article_link(section_title, full_href, title) - - def queue_article_link(self, section, url, title): - if section not in self.articles: - self.articles[section] = [] - self.articles[section].append( - dict(title=title, - url=url, - date='', - description='', - author='', - content='')) - - def populate_article_metadata(self, article, soup, first): - - def p(s): - print('[POPULATE METADATA] ' + s[0:80]) - - tnode = soup.find('title') - if tnode: - tstring = self.tag_to_string(tnode) - if ' - ' in tstring: - author = tstring.split('-')[0] - if author: - article.author = author - article.title = author + ' - ' + article.title.strip() - p('Add author to title:' + author) - - # known matches: pageNewsDetailDate, pageColumnistDetailLeftDate - regex = re.compile('(DetailDate|DetailLeftDate)$', re.IGNORECASE) - date_node = soup.find('div', {'class': regex}) - if date_node: - date = self.tag_to_string(date_node).__str__().split('/')[0] - date = ','.join(date.split(',')[:2]).strip() - article.title = date + ' - ' + article.title.strip() - article.date = date - p('Add date to title: ' + date) - - strong = soup.find('strong') - if strong: - article.text_summary = self.tag_to_string(strong) - p('Summary: ' + article.text_summary) - - def _dbg_soup_node(self, node): - s = ' cls: ' + node.get('class').__str__().strip() + \ - ' txt: ' + self.tag_to_string(node).strip() - return s diff --git a/recipes/tomshardware.recipe b/recipes/tomshardware.recipe deleted file mode 100644 index f8e4ef959e..0000000000 --- a/recipes/tomshardware.recipe +++ /dev/null @@ -1,73 +0,0 @@ -__license__ = 'GPL v3' -__copyright__ = '2008-2013, Darko Miletic ' -''' -tomshardware.com/us -''' - -try: - from urllib.parse import urlencode -except ImportError: - from urllib import urlencode -from calibre.web.feeds.recipes import BasicNewsRecipe - - -class Tomshardware(BasicNewsRecipe): - title = "Tom's Hardware US" - __author__ = 'Darko Miletic' - description = 'Hardware reviews and News' - publisher = "Tom's Hardware" - category = 'news, IT, hardware, USA' - no_stylesheets = True - needs_subscription = 'optional' - language = 'en' - INDEX = 'http://www.tomshardware.com' - LOGIN = INDEX + '/membres/' - remove_javascript = True - use_embedded_content = False - - conversion_options = { - 'comment': description, 'tags': category, 'publisher': publisher, 'language': language - } - - def get_browser(self): - br = BasicNewsRecipe.get_browser(self) - br.open(self.INDEX + '/us/') - if self.username is not None and self.password is not None: - data = urlencode({'action': 'login_action', 'r': self.INDEX + '/us/', 'login': self.username, 'mdp': self.password - }) - br.open(self.LOGIN, data) - return br - - remove_tags = [ - dict(name='div', attrs={'id': 'header'}), dict(name='object') - ] - - feeds = [ - - (u'Reviews', u'http://www.tomshardware.com/feeds/rss2/tom-s-hardware-us,18-2.xml'), - (u'News', u'http://www.tomshardware.com/feeds/rss2/tom-s-hardware-us,18-1.xml') - ] - - def print_version(self, url): - main, sep, rest = url.rpartition('.html') - rmain, rsep, article_id = main.rpartition(',') - tmain, tsep, trest = rmain.rpartition('/reviews/') - rind = 'http://www.tomshardware.com/news_print.php?p1=' - if tsep: - rind = 'http://www.tomshardware.com/review_print.php?p1=' - return rind + article_id - - def cleanup_image_tags(self, soup): - for item in soup.findAll('img'): - for attrib in ['height', 'width', 'border', 'align']: - item[attrib] = '' - del item[attrib] - return soup - - def preprocess_html(self, soup): - del(soup.body['onload']) - for item in soup.findAll(style=True): - del item['style'] - for it in soup.findAll('span'): - it.name = "div" - return self.cleanup_image_tags(soup) diff --git a/recipes/tomshardware_de.recipe b/recipes/tomshardware_de.recipe deleted file mode 100644 index 140b9a2a91..0000000000 --- a/recipes/tomshardware_de.recipe +++ /dev/null @@ -1,58 +0,0 @@ -__license__ = 'GPL v3' -__copyright__ = '2008, Kovid Goyal ' - -''' -Fetch tomshardware. -''' - -from calibre.web.feeds.news import BasicNewsRecipe - - -class cdnet(BasicNewsRecipe): - - title = 'tomshardware' - description = 'computer news in german' - __author__ = 'Oliver Niesner' - use_embedded_content = False - timefmt = ' [%d %b %Y]' - max_articles_per_feed = 50 - no_stylesheets = True - encoding = 'utf-8' - language = 'de' - - remove_tags = [dict(id='outside-advert'), - dict(id='advertRightWhite'), - dict(id='header-advert'), - dict(id='header-banner'), - dict(id='header-menu'), - dict(id='header-top'), - dict(id='header-tools'), - dict(id='nbComment'), - dict(id='commentTools'), - dict(id='internalSidebar'), - dict(id='header-news-infos'), - dict(id='header-news-tools'), - dict(id='breadcrumbs'), - dict(id='emailTools'), - dict(id='bookmarkTools'), - dict(id='printTools'), - dict(id='header-nextNews'), - dict(id='commentsBox'), - dict(id='showComments'), - dict(id='footer'), - dict(id=''), - dict(name='div', attrs={'class': 'pyjama'}), - dict(name='div', attrs={'class': 'basicCentral'}), - dict(name='li', attrs={ - 'class': 'simplePagination-previous'}), - dict(name='form', attrs={'id': 'commentForm'}), - dict(name='href', attrs={'class': 'comment'}), - dict(name='div', attrs={'class': 'greyBoxR clearfix'}), - dict(name='div', attrs={'class': 'greyBoxL clearfix'}), - dict(name='div', attrs={'class': 'greyBox clearfix'}), - dict(name='div', attrs={'class': 'labelized'}), - dict(id='')] - remove_tags_after = [dict(name='div', attrs={'class': 'labelized'})] - - feeds = [ - ('tomshardware', 'http://www.tomshardware.com/de/feeds/rss2/tom-s-hardware-de,12-1.xml')] diff --git a/recipes/tomshardware_it.recipe b/recipes/tomshardware_it.recipe deleted file mode 100644 index f366400055..0000000000 --- a/recipes/tomshardware_it.recipe +++ /dev/null @@ -1,26 +0,0 @@ -__license__ = 'GPL v3' -from calibre.web.feeds.news import BasicNewsRecipe - - -class AdvancedUserRecipe1327434170(BasicNewsRecipe): - title = u"Tom's Hardware" - oldest_article = 7 - max_articles_per_feed = 100 - auto_cleanup = True - masthead_url = 'http://userlogos.org/files/logos/spaljeni/tomshardwre.png' - - def get_article_url(self, article): - link = BasicNewsRecipe.get_article_url(self, article) - if link.split('/')[-1] == "story01.htm": - link = link.split('/')[-2] - a = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'I', 'L', 'N', 'S'] - b = ['0', '.', '/', '?', '-', '=', '&', - '_', 'http://', '.com', 'www.'] - for i in range(0, len(a)): - link = link.replace('0' + a[-i], b[-i]) - return link - feeds = [ - (u"Tom's Hardware", u'http://rss.feedsportal.com/c/32604/f/531080/index.rss')] - __author__ = 'faber1971' - description = 'Italian website on technology - v1.00 (28, January 2012)' - language = 'it' diff --git a/recipes/toronto_sun.recipe b/recipes/toronto_sun.recipe deleted file mode 100644 index 7ecacd1f24..0000000000 --- a/recipes/toronto_sun.recipe +++ /dev/null @@ -1,75 +0,0 @@ -#!/usr/bin/env python - -__license__ = 'GPL v3' -__copyright__ = '2009, Darko Miletic ' -''' -www.torontosun.com -''' - -from calibre.web.feeds.news import BasicNewsRecipe - - -class TorontoSun(BasicNewsRecipe): - title = 'Toronto SUN' - __author__ = 'Darko Miletic and Sujata Raman' - description = 'News from Canada' - publisher = 'Toronto Sun' - category = 'news, politics, Canada' - oldest_article = 2 - max_articles_per_feed = 100 - no_stylesheets = True - use_embedded_content = False - encoding = 'cp1252' - language = 'en_CA' - - conversion_options = { - 'comment': description, 'tags': category, 'publisher': publisher, 'language': language - } - - keep_only_tags = [ - dict(name='div', attrs={'class': ['articleHead', 'leftBox']}), dict(name='div', attrs={ - 'id': 'channelContent'}), dict(name='div', attrs={'id': 'rotateBox'}), dict(name='img') - ] - remove_tags = [ - dict(name='div', attrs={'class': ['bottomBox clear', 'bottomBox', 'breadCrumb', 'articleControls thin', 'articleControls thin short', 'extraVideoList']}), dict(name='h2', attrs={'class': 'microhead'}), dict(name='div', attrs={'id': 'commentsBottom'}), dict(name=['link', 'iframe', 'object']), dict(name='a', attrs={'rel': 'swap'}), dict(name='a', attrs={'href': '/news/haiti/'}), dict(name='ul', attrs={'class': ['tabs dl contentSwap', 'micrositeNav clearIt hList', 'galleryNav rotateNav']}) # noqa - ] - - remove_tags_after = [ - dict(name='div', attrs={'class': 'bottomBox clear'}), dict(name='div', attrs={ - 'class': 'rotateBox'}), dict(name='div', attrs={'id': 'contentSwap'}) - ] - - extra_css = ''' - h1{font-family :Arial,Helvetica,sans-serif; font-size:large;} - h2{font-family :Arial,Helvetica,sans-serif; font-size:medium; color:#666666;} - h3{font-family :Arial,Helvetica,sans-serif; font-size:medium; color:#000000;} - p{font-family :Arial,Helvetica,sans-serif; font-size:x-small;} - .bold{font-family :Arial,Helvetica,sans-serif; font-size: xx-small;color:#444444;margin-left: 0px;} - .subheading{font-family :Arial,Helvetica,sans-serif; font-size:medium; color:#000000; font-weight: bold;} - .byline{color:#666666; font-family :Arial,Helvetica,sans-serif; font-size: xx-small;} - .byline span{color:#666666; font-family :Arial,Helvetica,sans-serif; font-size: xx-small; text-transform: uppercase;} - .updated{font-family :Arial,Helvetica,sans-serif; font-size: xx-small;} - .galleryCaption{font-family :Arial,Helvetica,sans-serif; font-size: x-small;} - .galleryUpdated{font-family :Arial,Helvetica,sans-serif; font-size: x-small;} - ''' - - feeds = [ - - (u'News', u'http://www.torontosun.com/news/rss.xml'), - (u'Canada', u'http://www.torontosun.com/news/canada/rss.xml'), - (u'Columnists', u'http://www.torontosun.com/news/columnists/rss.xml'), - (u'World', u'http://www.torontosun.com/news/world/rss.xml'), - (u'Money', u'http://www.torontosun.com/money/rss.xml') - ] - - def preprocess_html(self, soup): - # To fetch images from the specified source - for img in soup.findAll('img', src=True): - url = img.get('src').split('?')[-1].partition('=')[-1] - if url: - img['src'] = url.split('&')[0].partition('=')[0] - img['width'] = url.split( - '&')[-1].partition('=')[-1].split('x')[0] - img['height'] = url.split( - '&')[-1].partition('=')[-1].split('x')[1] - return soup diff --git a/recipes/toyokeizai.recipe b/recipes/toyokeizai.recipe deleted file mode 100644 index e6a5c30ced..0000000000 --- a/recipes/toyokeizai.recipe +++ /dev/null @@ -1,67 +0,0 @@ -__license__ = 'GPL v3' -__copyright__ = '2010, Hiroshi Miura ' -''' -www.toyokeizai.net -''' - -import re - -from calibre.web.feeds.news import BasicNewsRecipe - - -class Toyokeizai(BasicNewsRecipe): - title = u'ToyoKeizai News' - __author__ = 'Hiroshi Miura' - oldest_article = 1 - max_articles_per_feed = 50 - description = 'Japanese traditional economy and business magazine, only for advanced subscribers supported' - publisher = 'Toyokeizai Shinbun Sha' - category = 'economy, magazine, japan' - language = 'ja' - encoding = 'euc-jp' - index = 'http://member.toyokeizai.net/news/' - remove_javascript = True - no_stylesheets = True - masthead_title = u'TOYOKEIZAI' - needs_subscription = True - timefmt = '[%y/%m/%d]' - recursions = 5 - match_regexps = [r'page/\d+'] - - keep_only_tags = [ - dict(name='div', attrs={'class': ['news']}), - dict(name='div', attrs={'class': ["news_cont"]}), - dict(name='div', attrs={'class': ["news_con"]}), - # dict(name='div', attrs={'class':["norightsMessage"]}) - ] - remove_tags = [{'class': "mt35 mgz"}, - {'class': "mt20 newzia"}, - {'class': "mt20 fontS"}, - {'class': "bk_btn_m"}, - dict(id='newzia_connect_member') - ] - - def parse_index(self): - feeds = [] - soup = self.index_to_soup(self.index) - topstories = soup.find('ul', attrs={'class': 'list6'}) - if topstories: - newsarticles = [] - for itt in topstories.findAll('li'): - itema = itt.find('a', href=True) - itemd = itt.find('span') - newsarticles.append({ - 'title': itema.string, 'date': re.compile(r"\- ").sub("", itemd.string), 'url': 'http://member.toyokeizai.net' + itema['href'], 'description': itema['title'] # noqa - }) - feeds.append(('news', newsarticles)) - return feeds - - def get_browser(self): - br = BasicNewsRecipe.get_browser(self) - if self.username is not None and self.password is not None: - br.open('http://member.toyokeizai.net/norights/form/') - br.select_form(nr=0) - br['kaiin_id'] = self.username - br['password'] = self.password - br.submit() - return br diff --git a/recipes/tpm_uk.recipe b/recipes/tpm_uk.recipe deleted file mode 100644 index 963855cca3..0000000000 --- a/recipes/tpm_uk.recipe +++ /dev/null @@ -1,41 +0,0 @@ -__license__ = 'GPL v3' -__copyright__ = '2010-2015, Darko Miletic ' -''' -www.philosophersmag.com -''' - -from calibre.web.feeds.news import BasicNewsRecipe - - -class TPM_uk(BasicNewsRecipe): - title = "The Philosophers' Magazine" - __author__ = 'Darko Miletic' - description = 'Title says it all' - publisher = "The Philosophers' Magazine" - category = 'philosophy, news' - oldest_article = 80 - max_articles_per_feed = 200 - no_stylesheets = True - encoding = 'utf8' - use_embedded_content = False - language = 'en_GB' - remove_empty_feeds = True - publication_type = 'magazine' - extra_css = """ - body{font-family: Raleway,sans-serif } - """ - - conversion_options = { - 'comment': description, 'tags': category, 'publisher': publisher, 'language': language - } - - remove_tags = [ - dict(name=['meta', 'link', 'base', 'iframe', 'embed', 'object', 'img'])] - keep_only_tags = [ - dict(attrs={'class': ['article-title', 'article-content']})] - - feeds = [ - - (u'Articles', u'http://www.philosophersmag.com/index.php/tpm-mag-articles?format=feed&type=rss'), - (u'Reflections', u'http://www.philosophersmag.com/index.php/reflections?format=feed&type=rss') - ] diff --git a/recipes/tri_city_herald.recipe b/recipes/tri_city_herald.recipe deleted file mode 100644 index a1cda789a6..0000000000 --- a/recipes/tri_city_herald.recipe +++ /dev/null @@ -1,27 +0,0 @@ -from calibre.web.feeds.news import BasicNewsRecipe - - -class TriCityHeraldRecipe(BasicNewsRecipe): - title = u'Tri-City Herald' - description = 'The Tri-City Herald Mid-Columbia.' - language = 'en' - __author__ = 'Laura Gjovaag' - oldest_article = 1.5 - max_articles_per_feed = 100 - no_stylesheets = True - remove_javascript = True - keep_only_tags = [ - dict(name='div', attrs={'id': 'story_header'}), - dict(name='img', attrs={'class': 'imageCycle'}), - dict(name='div', attrs={'id': ['cycleImageCaption', 'story_body']}) - ] - remove_tags = [ - dict(name='div', attrs={'id': 'story_mlt'}), - dict(name='a', attrs={'id': 'commentCount'}), - dict(name=['script', 'noscript', 'style'])] - extra_css = 'h1{font: bold 140%;} #cycleImageCaption{font: monospace 60%}' - - feeds = [ - (u'Tri-City Herald Mid-Columbia', - u'http://www.tri-cityherald.com/901/index.rss') - ] diff --git a/recipes/trojmiasto_pl.recipe b/recipes/trojmiasto_pl.recipe deleted file mode 100644 index 73d679aaf9..0000000000 --- a/recipes/trojmiasto_pl.recipe +++ /dev/null @@ -1,57 +0,0 @@ -import re - -from calibre.web.feeds.news import BasicNewsRecipe - - -class Trojmiasto(BasicNewsRecipe): - title = u'Tr\xf3jmiasto.pl' - __author__ = 'fenuks' - description = u'Wiadomości, imprezy, wydarzenia, spektakle.Gdańsk, Gdynia, Sopot - NOCLEGI, Katalog firm, repertuar kin, wydarzenia, przewodnik, mapa, kwatery, hotele. Portal regionalny trojmiasto.pl' # noqa - category = '' - language = 'pl' - encoding = 'utf-8' - extra_css = 'ul {list-style: none; padding:0; margin:0;}' - cover_url = 'http://www.trojmiasto.pl/_img/toplong2/logo_trojmiasto.gif' - use_embedded_content = False - oldest_article = 7 - max_articles_per_feed = 100 - no_stylesheets = True - remove_empty_feeds = True - remove_javascript = True - remove_attributes = ['style', 'font'] - ignore_duplicate_articles = {'title', 'url'} - - preprocess_regexps = [(re.compile(u'Czytaj więcej.*?', re.DOTALL | re.IGNORECASE), lambda match: ''), (re.compile(u'Zobacz też.*?', re.DOTALL | re.IGNORECASE), lambda match: ''), # noqa - (re.compile(u'[A-ZĄĆĘŁŃÓŚŹŻ ,.:-]*?', re.DOTALL), lambda match: ''), ] - - remove_tags = [ - dict(id=['logo', 'font_small', 'font_big']), - dict(attrs={'class': ['title-long', 'ankieta', 'newsletter-inside-content newsletter-wrap', 'copyright_box', 'logo', 'btn btn-photo-add', 'related-info-wrap', 'nTabs', 'article-list', 'rate-player horizontal', 'type-box', 'rate-player', 'hover-nav', 'live-head tC', 'prev-link', 'next-link', 'ie6']}), # noqa - dict(attrs={'title': [u'drukuj artykuł', u'podziel się na Facebooku', u'prześlij artykuł']})] - remove_tags_after = dict(attrs={'class': 'author-wrap'}) - remove_tags_before = dict(attrs={'class': 'text-container'}) - - feeds = [ - (u'Wszystkie', u'http://rss.trojmiasto.pl/rss,0.xml'), - (u'Fakty i opinie', u'http://rss.trojmiasto.pl/rss,1.xml'), - (u'Sport', u'http://rss.trojmiasto.pl/rss,2.xml'), - (u'Dom', u'http://rss.trojmiasto.pl/rss,3.xml'), - (u'Moto', u'http://rss.trojmiasto.pl/rss,4.xml'), - (u'Nauka', u'http://rss.trojmiasto.pl/rss,5.xml'), - (u'Rozrywka', u'http://rss.trojmiasto.pl/rss,6.xml'), - (u'Kultura', u'http://rss.trojmiasto.pl/rss,7.xml'), - (u'Rowery', u'http://rss.trojmiasto.pl/rss,8.xml'), - (u'Dziecko', u'http://rss.trojmiasto.pl/rss,9.xml'), - (u'Zdrowie i uroda', u'http://rss.trojmiasto.pl/rss,10.xml'), - (u'Praca', u'http://rss.trojmiasto.pl/rss,11.xml'), - (u'Artyku\u0142y czytelnik\xf3w', u'http://rss.trojmiasto.pl/rss,12.xml'), - (u'Korki', u'http://rss.trojmiasto.pl/rss,13.xml'), - (u'Historia', u'http://rss.trojmiasto.pl/rss,14.xml'), - (u'Biznes', u'http://rss.trojmiasto.pl/rss,16.xml'), - (u'Kryminalne Tr\xf3jmiasto', u'http://rss.trojmiasto.pl/rss,17.xml'), - (u'Przewodnik', u'http://rss.trojmiasto.pl/rss,18.xml'), - (u'Aktywne Tr\xf3jmiasto', u'http://rss.trojmiasto.pl/rss,19.xml'), - (u'Delux', u'http://rss.trojmiasto.pl/rss,20.xml')] - - def print_version(self, url): - return url + '?print=1' diff --git a/recipes/trombon.recipe b/recipes/trombon.recipe deleted file mode 100644 index bad8de80ca..0000000000 --- a/recipes/trombon.recipe +++ /dev/null @@ -1,49 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- - -__license__ = 'GPL v3' -__copyright__ = u'2011, Silviu Cotoar\u0103' -''' -trombon.ro -''' - -from calibre.web.feeds.news import BasicNewsRecipe - - -class Trombon(BasicNewsRecipe): - title = u'Trombon' - __author__ = u'Silviu Cotoar\u0103' - description = u'Parodii si Pamflete' - publisher = u'Trombon' - oldest_article = 5 - language = 'ro' - max_articles_per_feed = 100 - no_stylesheets = True - use_embedded_content = False - category = 'Ziare,Reviste,Fun' - encoding = 'utf-8' - cover_url = 'http://www.trombon.ro/i/trombon.gif' - - conversion_options = { - 'comments': description, 'tags': category, 'language': language, 'publisher': publisher - } - - keep_only_tags = [ - dict(name='div', attrs={'class': 'articol'}) - ] - - remove_tags = [ - dict(name='div', attrs={'class': ['info_2']}), dict( - name='iframe', attrs={'scrolling': ['no']}) - ] - - remove_tags_after = [ - dict(name='div', attrs={'id': 'article_vote'}) - ] - - feeds = [ - (u'Feeds', u'http://feeds.feedburner.com/trombon/ABWb?format=xml') - ] - - def preprocess_html(self, soup): - return self.adeify_images(soup) diff --git a/recipes/trystero.recipe b/recipes/trystero.recipe deleted file mode 100644 index bb1f3af19a..0000000000 --- a/recipes/trystero.recipe +++ /dev/null @@ -1,26 +0,0 @@ -#!/usr/bin/env python - -__license__ = 'GPL v3' -__copyright__ = u'2013, Tomasz Dlugosz ' - -''' -trystero.pl -''' - -from calibre.web.feeds.news import BasicNewsRecipe - - -class trystero(BasicNewsRecipe): - title = 'Trystero' - __author__ = u'Tomasz D\u0142ugosz' - language = 'pl' - description = u'Trystero.pl jest niezależnym blogiem finansowym. Publikowane na nim teksty dotyczą rynku kapitałowego, ekonomii, gospodarki i życia społecznego – w takiej mniej więcej kolejności.' # noqa - oldest_article = 7 - remove_javascript = True - no_stylesheets = True - - feeds = [(u'Newsy', u'http://www.trystero.pl/feed')] - - keep_only_tags = [ - dict(name='h1'), - dict(name='div', attrs={'class': ['post-content']})] diff --git a/recipes/tsn.recipe b/recipes/tsn.recipe deleted file mode 100644 index d99a99ade3..0000000000 --- a/recipes/tsn.recipe +++ /dev/null @@ -1,22 +0,0 @@ -from calibre.web.feeds.news import BasicNewsRecipe - - -class AdvancedUserRecipe1289990851(BasicNewsRecipe): - title = u'TSN' - oldest_article = 7 - max_articles_per_feed = 50 - language = 'en_CA' - __author__ = 'Nexus' - no_stylesheets = True - auto_cleanup = True - use_embedded_content = False - INDEX = 'http://tsn.ca/nhl/story/?id=nhl' - # keep_only_tags = [dict(name='div', attrs={'id':['tsnColWrap']}), - # dict(name='div', attrs={'id':['tsnStory']})] - # remove_tags = [dict(name='div', attrs={'id':'tsnRelated'}), - # dict(name='div', attrs={'class':'textSize'})] - - feeds = [ - ('News', - 'http://www.tsn.ca/datafiles/rss/Stories.xml'), - ] diff --git a/recipes/tuttojove.recipe b/recipes/tuttojove.recipe deleted file mode 100644 index 4b483eca8a..0000000000 --- a/recipes/tuttojove.recipe +++ /dev/null @@ -1,26 +0,0 @@ -__license__ = 'GPL v3' -__author__ = 'faber1971' -description = 'Italian website on Juventus F.C. - v1.00 (17, December 2011)' - -from calibre.web.feeds.news import BasicNewsRecipe - - -class AdvancedUserRecipe1305984536(BasicNewsRecipe): - title = u'tuttojuve' - description = 'Juventus' - language = 'it' - __author__ = 'faber1971' - oldest_article = 1 - max_articles_per_feed = 100 - - feeds = [ - (u'notizie', u'http://feeds.tuttojuve.com/rss/'), - (u'da vinovo', u'http://feeds.tuttojuve.com/rss/?c=10'), - (u'primo piano', u'http://feeds.tuttojuve.com/rss/?c=16'), - (u'editoriale', u'http://feeds.tuttojuve.com/rss/?c=3'), - (u'il punto', u'http://feeds.tuttojuve.com/rss/?c=8'), - (u'pagelle', u'http://feeds.tuttojuve.com/rss/?c=9'), - (u'avversario', u'http://feeds.tuttojuve.com/rss/?c=11')] - - def print_version(self, url): - return self.browser.open_novisit(url).geturl() diff --git a/recipes/tuttosport.recipe b/recipes/tuttosport.recipe deleted file mode 100644 index 805f3355b8..0000000000 --- a/recipes/tuttosport.recipe +++ /dev/null @@ -1,69 +0,0 @@ -#!/usr/bin/env python -__license__ = 'GPL v3' -__author__ = 'Lorenzo Vigentini' -__copyright__ = '2009, Lorenzo Vigentini ' -__version__ = 'v1.01' -__date__ = '30, January 2010' -__description__ = 'Sport daily news from Italy' - -'''www.tuttosport.com''' - -from calibre.web.feeds.news import BasicNewsRecipe - - -class tuttosport(BasicNewsRecipe): - author = 'Lorenzo Vigentini' - description = 'Sport daily news from Italy' - - cover_url = 'http://www.tuttosport.com/res/imgs/logo_TuttoSport.png' - title = 'Tuttosport' - publisher = 'Nuova Editoriale Sportiva S.r.l' - category = 'Sport News' - - language = 'it' - timefmt = '[%a, %d %b, %Y]' - - oldest_article = 2 - max_articles_per_feed = 20 - use_embedded_content = False - recursion = 10 - - remove_javascript = True - no_stylesheets = True - - def print_version(self, url): - segments = url.split('/') - printURL = '/'.join(segments[0:10]) + '?print' - return printURL - - keep_only_tags = [ - dict(name='h2', attrs={'class': 'tit_Article'}), - dict(name='div', attrs={ - 'class': ['box_Img img_L ', 'txt_ArticleAbstract', 'txt_Article txtBox_cms']}) - ] - - feeds = [ - (u'Primo piano', u'http://www.tuttosport.com/rss/primo_piano.xml'), - (u'Cronanca', u'http://www.tuttosport.com/rss/Cronaca-205.xml'), - (u'Lettere al direttore', - u'http://blog.tuttosport.com/direttore/feed'), - (u'Calcio', u'http://www.tuttosport.com/rss/Calcio-3.xml'), - (u'Speciale Derby', - u'http://www.tuttosport.com/rss/Speciale-derby-310.xml'), - (u'Formula 1', u'hhttp://www.tuttosport.com/rss/Formula-1-7.xml'), - (u'Moto', u'hhttp://www.tuttosport.com/rss/Moto-8.xml'), - (u'Basket', u'http://www.tuttosport.com/rss/Basket-9.xml'), - (u'Altri Sport', u'http://www.tuttosport.com/rss/Altri-Sport-2.xml'), - (u'Tuttosport League', - u'http://www.tuttosport.com/rss/Tuttosport-League-245.xml'), - (u'Scommesse', u'http://www.tuttosport.com/rss/Scommesse-286.xml') - ] - - extra_css = ''' - body {font-family: Arial, Verdana, sans-serif; margin-bottom: 3em;} - h2.tit_Article {color:#9C3A0B;margin: 15px 8px 0; margin-bottom: 1px; border-bottom: 3px solid;} - .txt_ArticleAbstract {color:#4080AE;clear: both; margin: 3px 8px;} - .txt_Article {clear: both; margin: 8px 8px 12px;} - .txt_Author {float: right;} - .txt_ArticleAuthor {clear: both; margin: 8px;} - ''' diff --git a/recipes/tveast_dk.recipe b/recipes/tveast_dk.recipe deleted file mode 100644 index 255974b91e..0000000000 --- a/recipes/tveast_dk.recipe +++ /dev/null @@ -1,27 +0,0 @@ -#!/usr/bin/env python -# vim:fileencoding=utf-8 -# https://manual.calibre-ebook.com/news_recipe.html -from __future__ import absolute_import, division, print_function, unicode_literals - -from calibre.web.feeds.news import BasicNewsRecipe - -''' -TV ØST -''' - - -class Tveast_dk(BasicNewsRecipe): - __author__ = 'CoderAllan.github.com' - title = 'TV ØST' - description = ('Ved at abonnere på vores RSS-feed kan du få de seneste ' - 'regionale nyheder til at automatisk dukke op på din skærm.') - category = 'news, localnews, sport, culture, Denmark' - oldest_article = 7 - max_articles_per_feed = 50 - auto_cleanup = True - language = 'da' - - # Feed are found here: http://www.tveast.dk/tveast/rss - feeds = [ - ('TV ØST', 'http://www.tveast.dk/tveast/rss'), - ] diff --git a/recipes/tvn24.recipe b/recipes/tvn24.recipe deleted file mode 100644 index 789dda0eda..0000000000 --- a/recipes/tvn24.recipe +++ /dev/null @@ -1,42 +0,0 @@ -from calibre.web.feeds.news import BasicNewsRecipe - - -class tvn24(BasicNewsRecipe): - title = u'TVN24' - oldest_article = 7 - max_articles_per_feed = 100 - __author__ = 'fenuks, Artur Stachecki' - description = u'Sport, Biznes, Gospodarka, Informacje, Wiadomości Zawsze aktualne wiadomości z Polski i ze świata' - category = 'news' - language = 'pl' - cover_url = 'http://ncplus.pl/~/media/n/npl/kanaly/logo%20na%20strony%20kanalow/tvn24-630.png' # noqa - extra_css = 'ul {list-style: none; padding: 0; margin: 0;} li {float: left;margin: 0 0.15em;}' - remove_empty_feeds = True - remove_javascript = True - no_stylesheets = True - keep_only_tags = [ - dict(name='article',attrs={'class':'mb20'}) - ] - remove_tags = [ - dict(attrs={'class': ['commentsInfo', 'textSize', 'related newsNews align-right', 'box', 'watchMaterial text', 'related galleryGallery align-center', 'advert block-alignment-right', 'userActions', 'socialBookmarks', 'im yourArticle fl', 'dynamicButton addComment fl', 'thumbsGallery', 'relatedObject customBlockquote align-right', 'lead', 'mainRightColumn', 'articleDateContainer borderGreyBottom', 'socialMediaContainer onRight loaded', 'quizContent', 'twitter', 'facebook', 'googlePlus', 'share', 'voteResult', 'reportTitleBar bgBlue_v4 mb15', 'innerVideoModule center']}), # noqa - dict(name='aside'), - dict(name='figure'), - dict(name='section', attrs={ - 'id': ['forum', 'innerArticle', 'quiz toCenter', 'mb20']}) - ] - remove_tags_after = [dict(name='li', attrs={'class': 'share'})] - feeds = [(u'Najnowsze', u'http://www.tvn24.pl/najnowsze.xml'), ] - - def get_article_url(self, article): - link = article.get('link') - # following websites are linked in aforementioned feeds, but have different layout not compatible with this recipe - banned = ['tvnwarszawa.pl','tvnmeteo.pl','szklokontaktowe.tvn24.pl','tvn24bis.pl'] - if not any(x in link for x in banned): - return link - - def preprocess_html(self, soup): - for alink in soup.findAll('a'): - if alink.string is not None: - tstr = alink.string - alink.replaceWith(tstr) - return soup diff --git a/recipes/tvxs.recipe b/recipes/tvxs.recipe deleted file mode 100644 index eeb5d53db8..0000000000 --- a/recipes/tvxs.recipe +++ /dev/null @@ -1,72 +0,0 @@ -# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai - -import re - -from calibre.web.feeds.recipes import BasicNewsRecipe - - -class TVXS(BasicNewsRecipe): - title = 'TVXS' - __author__ = 'hargikas' - description = 'News from Greece' - max_articles_per_feed = 100 - oldest_article = 3 - publisher = 'TVXS' - category = 'news, sport, greece' - language = 'el' - encoding = None - use_embedded_content = False - remove_empty_feeds = True - conversion_options = {'smarten_punctuation': True} - no_stylesheets = True - publication_type = 'newspaper' - remove_tags_before = dict(name='h1', attrs={'class': 'print-title'}) - remove_tags_after = dict(name='div', attrs={ - 'class': 'field field-type-relevant-content field-field-relevant-articles'}) - remove_tags = [dict(name='div', attrs={'class': 'field field-type-relevant-content field-field-relevant-articles'}), - dict(name='div', attrs={ - 'class': 'field field-type-filefield field-field-image-gallery'}), - dict(name='div', attrs={'class': 'filefield-file'})] - remove_attributes = ['border', 'cellspacing', 'align', 'cellpadding', - 'colspan', 'valign', 'vspace', 'hspace', 'alt', 'width', 'height'] - extra_css = 'body { font-family: verdana, helvetica, sans-serif; } \ - table { width: 100%; } \ - td img { display: block; margin: 5px auto; } \ - ul { padding-top: 10px; } \ - ol { padding-top: 10px; } \ - li { padding-top: 5px; padding-bottom: 5px; } \ - h1 { text-align: center; font-size: 125%; font-weight: bold; } \ - h2, h3, h4, h5, h6 { text-align: center; font-size: 100%; font-weight: bold; }' - preprocess_regexps = [(re.compile(r'', re.IGNORECASE), lambda m: ''), - (re.compile(r'', re.IGNORECASE), lambda m: '')] - - feeds = [(u'Ελλάδα', 'http://tvxs.gr/feeds/2/feed.xml'), - (u'Κόσμος', 'http://tvxs.gr/feeds/5/feed.xml'), - (u'Τοπικά Νέα', 'http://tvxs.gr/feeds/5363/feed.xml'), - (u'Sci Tech', 'http://tvxs.gr/feeds/26/feed.xml'), - (u'Αθλητικά', 'http://tvxs.gr/feeds/243/feed.xml'), - (u'Internet & ΜΜΕ', 'http://tvxs.gr/feeds/32/feed.xml'), - (u'Καλά Νέα', 'http://tvxs.gr/feeds/914/feed.xml'), - (u'Απόψεις', 'http://tvxs.gr/feeds/1109/feed.xml'), - (u'Πολιτισμός', 'http://tvxs.gr/feeds/1317/feed.xml'), - (u'Greenlife', 'http://tvxs.gr/feeds/3/feed.xml'), - (u'Ιστορία', 'http://tvxs.gr/feeds/1573/feed.xml'), - (u'Χιούμορ', 'http://tvxs.gr/feeds/692/feed.xml')] - - def print_version(self, url): - br = self.get_browser() - response = br.open(url) - data = response.read() - - pos_1 = data.find('', pos_1) - if pos_2 == -1: - return url - - pos_1 += len('