'
-__license__ = 'GPL v3'
-__version__ = '1.2'
-
-'''
-http://www.thecodelesscode.com/
-'''
-
-from calibre.ebooks.BeautifulSoup import BeautifulSoup as bs
-from calibre.web.feeds.news import BasicNewsRecipe
-
-
-class CodelessCode(BasicNewsRecipe):
- __author__ = 'April King'
- title = u'The Codeless Code'
- category = 'fiction, programming, technology'
- chapters = {} # ie, Mousetrap -> 182
- compress_news_images = True
- compress_news_images_max_size = 100
- cover_url = 'http://www.thecodelesscode.com/pages/case-9/Lotus-050.jpg'
- credits = [u'{0}
'.format(title),
- u'By Qi
',
- u'An illustrated collection of (sometimes violent) fables concerning the Art and Philosophy of software development, written in the spirit of Zen kōans
', # noqa
- u'eBook conversion courtesy of {0}
'.format(__author__)]
- description = u'The Art and Philosophy of software development, written in the spirit of Zen kōans'
- extra_css = '.article_date { display: none; float: right; } \
- .chapter_title { font-size: 1.75em; margin-top: 0; } \
- .chapter_title::first-letter { font-size: 1.35em; font-weight: 500; letter-spacing: -.05em; } \
- h2 { margin-top: 0; } \
- .image_wrapper { text-align: center; }'
- index = 'http://www.thecodelesscode.com/contents'
- language = 'en'
- max_articles_per_feed = 1000 # I can only wish
- path_remappings = {} # IE, /case/182 -> articles_72/index.html
- publication_type = 'blog'
- publisher = 'Qi'
- resolve_internal_links = True
- scale_news_images = (600, 400)
- simultaneous_downloads = 1
- url = 'http://www.thecodelesscode.com'
-
- def parse_index(self):
- koans = []
-
- # Retrieve the contents page, containing the ToC
- soup = self.index_to_soup(self.index)
-
- for koan in soup.findAll('tr'):
- # BS has some trouble with the weird layout
- tag = koan.find('a')
-
- if tag is None:
- continue
- if 'random' in tag['href']:
- continue
-
- # Minor coding error causes calibre to glitch; use the current date
- # for the most recent title
- koan_date = koan.find('td', attrs={'class': 'toc-date'})
- if koan_date is None:
- koan_date = date.isoformat(date.today())
- else:
- koan_date = koan_date.string
-
- title = tag.string
- url = self.url + tag['href']
-
- if u'The Applicant' in title:
- continue # Only the main story
-
- koans.append({
- 'content': '',
- 'date': koan_date,
- 'description': '',
- 'title': title,
- 'url': url,
- })
-
- # ie, Mousetrap -> 182
- self.chapters[title] = url.split('/')[-1]
-
- # Oldest koans first
- koans.reverse()
-
- # Log and then get out of here
- self.log("Found {0} koans".format(len(koans)))
- return([(self.title, koans)])
-
- def preprocess_html(self, soup):
- title = soup.find('h1', attrs={'class': 'title'}).find(
- 'a', attrs={'class': 'subtle'}).string
-
- # Add a title at the beginning of each chapter
- if title in self.chapters:
- title = '{0}
'.format(title)
-
- # Load up the actual story
- koan = soup.find('div', attrs={'class': 'story koan'})
-
- # Kind of a hack-y way to get .children in BS3
- # ->
- contents = list(koan.contents)
- koan = bs(title)
-
- for i in reversed(contents):
- koan.insert(1, i)
-
- # Remove all anchors that don't contain /case/, leaving them as just their text
- # Note that we'll come back and clean up /case/ links when the URLs are remapped
- # during postprocess_book()
- anchors = koan.findAll('a')
- if anchors != []:
- for anchor in anchors:
- if '/case/' in anchor['href']:
- pass
- elif 'note' in anchor['href']:
- anchor.replaceWith('')
- else:
- # Again, a hacky way to get the contents of the tag, thanks
- # to BS3
- contents = list(anchor.contents)
- linktext = bs()
- for i in reversed(contents):
- linktext.insert(1, i)
- anchor.replaceWith(linktext)
-
- # Find all the images, and wrap them up in an image_wrapper div
- for i in range(0, len(koan.contents), 1):
- if not hasattr(koan.contents[i], 'name'):
- continue # skip carriage returns
- if koan.contents[i].name == u'img':
- div = bs('')
- div.div.insert(0, koan.contents[i])
- koan.insert(i, div)
-
- return(koan)
-
- def canonicalize_internal_url(self, url, is_link=True):
- url = url.split(self.url)[-1]
- return BasicNewsRecipe.canonicalize_internal_url(self, url, is_link=is_link)
-
- def postprocess_book(self, oeb, opts, log):
- # Go through each internal representation of each HTML file, and fix
- # all the broken hrefs, if possible
- for item in oeb.manifest.items:
- if item.media_type == 'text/html':
-
- for node in item.data.xpath('//*[@href]'):
- naughty_href = node.get('href')
-
- if naughty_href in self.path_remappings:
- node.set('href', '../' +
- self.path_remappings[naughty_href])
- href = node.get('href')
- self.log(
- "Remapped href {0} --> {1}".format(naughty_href, href))
-
- # Remove the superfluous extra feed page at the beginning of the book, replacing it
- # with the proper credits
- for item in oeb.manifest.hrefs['index.html'].data.xpath('//*[local-name()="ul"]'):
- item.getparent().remove(item)
-
- for item in oeb.manifest.hrefs['index.html'].data.xpath('//*[local-name()="p"]'):
- item.getparent().remove(item)
-
- for item in oeb.manifest.hrefs['index.html'].data.xpath('//*[local-name()="div"]'):
- for credit in self.credits[::-1]:
- item.insert(0, etree.fromstring(credit, parser=etree.XMLParser(recover=True, no_network=True, resolve_entities=False)))
-
- # Change the creator from "calibre" to the actual author
- # Also, we don't need the date in the ebook's title
- oeb.metadata.items['creator'][0].value = self.publisher
- oeb.metadata.items['description'][0].value = oeb.metadata.items[
- 'description'][0].value.split('\n\nArticles in this issue')[0]
- oeb.metadata.items['publication_type'][0].value = self.title
- oeb.metadata.items['publisher'][0].value = self.publisher
- oeb.metadata.items['title'][0].value = self.title
diff --git a/recipes/thedgesingapore.recipe b/recipes/thedgesingapore.recipe
deleted file mode 100644
index be20a26ba6..0000000000
--- a/recipes/thedgesingapore.recipe
+++ /dev/null
@@ -1,52 +0,0 @@
-#!/usr/bin/env python
-
-__license__ = 'GPL v3'
-__copyright__ = '2009, Darko Miletic '
-'''
-www.livemint.com
-'''
-
-from calibre.web.feeds.news import BasicNewsRecipe
-
-
-class Edgesingapore(BasicNewsRecipe):
- title = 'The Edge Singapore'
- __author__ = 'Darko Miletic'
- description = 'Financial news from Singapore'
- publisher = 'The Edge Singapore'
- category = 'news, finances, singapore'
- language = 'en'
-
- lang = 'en_SG'
- oldest_article = 15
- max_articles_per_feed = 100
- no_stylesheets = True
- encoding = 'utf-8'
- use_embedded_content = False
- extra_css = ' .contentheading{font-size: x-large} .small{font-size: small} .createdate{font-size: small; font-weight: bold} '
-
- conversion_options = {
- 'comment': description, 'tags': category, 'publisher': publisher, 'author': publisher, 'language': lang, 'pretty_print': True, 'linearize_tables': True
- }
-
- remove_tags = [
- dict(name=['object', 'link', 'embed', 'form', 'iframe']), dict(name='div', attrs={
- 'id': 'toolbar-article'}), dict(name='div', attrs={'class': 'backtotop'}), dict(name='img', attrs={'alt': 'Print'})
- ]
-
- remove_tags_after = dict(name='div', attrs={'class': 'backtotop'})
-
- feeds = [(u'Articles', u'http://feeds.feedburner.com/edgesg')]
-
- def print_version(self, url):
- return url + '?tmpl=component&print=1'
-
- def preprocess_html(self, soup):
- attribs = ['style', 'font', 'valign', 'colspan', 'width', 'height', 'rowspan', 'summary', 'align', 'cellspacing', 'cellpadding', 'frames', 'rules', 'border' # noqa
- ]
- for item in soup.body.findAll(name=['table', 'td', 'tr', 'th', 'caption', 'thead', 'tfoot', 'tbody', 'colgroup', 'col']):
- item.name = 'div'
- for attrib in attribs:
- item[attrib] = ''
- del item[attrib]
- return self.adeify_images(soup)
diff --git a/recipes/theluminouslandscape.recipe b/recipes/theluminouslandscape.recipe
deleted file mode 100644
index b796b3052a..0000000000
--- a/recipes/theluminouslandscape.recipe
+++ /dev/null
@@ -1,34 +0,0 @@
-
-__license__ = 'GPL v3'
-__copyright__ = '2010, Darko Miletic '
-'''
-luminous-landscape.com
-'''
-
-from calibre.web.feeds.news import BasicNewsRecipe
-
-
-class theluminouslandscape(BasicNewsRecipe):
- title = 'The Luminous Landscape'
- __author__ = 'Darko Miletic'
- description = 'A photography news and information website in the form of a weblog with multiple authors who write on a variety of photography and art-photography related issues.' # noqa
- publisher = 'The Luminous Landscape '
- category = 'news, blog, photograph, international'
- oldest_article = 15
- max_articles_per_feed = 100
- no_stylesheets = True
- remove_empty_feeds = True
- use_embedded_content = True
- encoding = 'cp1252'
- language = 'en'
-
- conversion_options = {
- 'comment': description, 'tags': category, 'publisher': publisher, 'language': language
- }
-
- feeds = [
- (u"What's new", u'http://www.luminous-landscape.com/whatsnew/rssfeed.php')]
- remove_tags = [dict(name=['object', 'link', 'iframe'])]
-
- def preprocess_html(self, soup):
- return self.adeify_images(soup)
diff --git a/recipes/themarketticker.recipe b/recipes/themarketticker.recipe
deleted file mode 100644
index 4a5fd1196e..0000000000
--- a/recipes/themarketticker.recipe
+++ /dev/null
@@ -1,24 +0,0 @@
-#!/usr/bin/env python
-
-__license__ = 'GPL v3'
-__copyright__ = '2008, Darko Miletic '
-'''
-market-ticker.denninger.net
-'''
-
-from calibre.web.feeds.news import BasicNewsRecipe
-
-
-class Themarketticker(BasicNewsRecipe):
- title = 'The Market Ticker'
- __author__ = 'Darko Miletic'
- description = 'Commentary On The Capital Markets'
- oldest_article = 7
- max_articles_per_feed = 100
- language = 'en'
-
- no_stylesheets = True
- use_embedded_content = True
- html2lrf_options = ['--comment', description, '--category', 'blog,news,finances', '--base-font-size', '10'
- ]
- feeds = [(u'Posts', u'http://market-ticker.denninger.net/feeds/index.rss2')]
diff --git a/recipes/themorningpaper.recipe b/recipes/themorningpaper.recipe
deleted file mode 100644
index 0cf4263873..0000000000
--- a/recipes/themorningpaper.recipe
+++ /dev/null
@@ -1,42 +0,0 @@
-#!/usr/bin/env python
-# -*- mode: python -*-
-# -*- coding: utf-8 -*-
-
-__license__ = 'GPL v3'
-__copyright__ = '2017, Darko Miletic '
-'''
-blog.acolyer.org
-'''
-
-from calibre.web.feeds.news import BasicNewsRecipe
-
-
-class Themorningpaper(BasicNewsRecipe):
- title = 'The Morning Paper'
- __author__ = 'Darko Miletic'
- description = ('an interesting/influential/important paper from'
- ' the world of CS every weekday morning, as selected by Adrian Colyer')
- publisher = 'Adrian Colyer'
- category = 'news, tech'
- oldest_article = 180
- max_articles_per_feed = 200
- no_stylesheets = True
- encoding = 'utf-8'
- use_embedded_content = False
- language = 'en'
- remove_empty_feeds = True
- auto_cleanup = True
- publication_type = 'blog'
- extra_css = """
- body{font-family: Georgia,Palatino,serif }
- img{margin-bottom: 0.4em; display:block}
- """
-
- conversion_options = {
- 'comment': description,
- 'tags': category,
- 'publisher': publisher,
- 'language': language
- }
-
- feeds = [(u'Articles', u'https://blog.acolyer.org/feed/')]
diff --git a/recipes/thenews.recipe b/recipes/thenews.recipe
deleted file mode 100644
index 6d71543cac..0000000000
--- a/recipes/thenews.recipe
+++ /dev/null
@@ -1,88 +0,0 @@
-from calibre.web.feeds.news import BasicNewsRecipe
-
-
-class TheNewsRecipe(BasicNewsRecipe):
- __license__ = 'GPL v3'
- __author__ = 'kwetal'
- language = 'en_PK'
- version = 1
-
- title = u'The News'
- publisher = u'Jang Group'
- category = u'News, Pakistan'
- description = u'English Newspaper from Pakistan'
-
- use_embedded_content = False
- remove_empty_feeds = True
- oldest_article = 2
- max_articles_per_feed = 100
-
- no_stylesheets = True
- remove_javascript = True
- encoding = 'iso-8859-1'
-
- remove_tags = []
- remove_tags.append(dict(name='img', attrs={'src': 'images/thenews.gif'}))
- remove_tags.append(dict(name='img', attrs={'src': 'images/shim.gif'}))
-
- # Feeds from http://thenews.com.pk/rss.asp
- feeds = []
- feeds.append(
- (u'Latest Stories', u'http://www.thenews.com.pk/rss/thenews_updates.xml'))
- feeds.append(
- (u'Top Stories', u'http://www.thenews.com.pk/rss/thenews_topstories.xml'))
- feeds.append(
- (u'World News', u'http://www.thenews.com.pk/rss/thenews_world.xml'))
- feeds.append(
- (u'National News', u'http://www.thenews.com.pk/rss/thenews_national.xml'))
- feeds.append(
- (u'Business News', u'http://www.thenews.com.pk/rss/thenews_business.xml'))
- feeds.append(
- (u'Karachi News', u'http://www.thenews.com.pk/rss/thenews_karachi.xml'))
- feeds.append(
- (u'Lahore News', u'http://www.thenews.com.pk/rss/thenews_lahore.xml'))
- feeds.append(
- (u'Islamabad News', u'http://www.thenews.com.pk/rss/thenews_islamabad.xml'))
- feeds.append(
- (u'Peshawar News', u'http://www.thenews.com.pk/rss/thenews_peshawar.xml'))
- feeds.append(
- (u'Editorial', u'http://www.thenews.com.pk/rss/thenews_editorial.xml'))
- feeds.append(
- (u'Opinion', u'http://www.thenews.com.pk/rss/thenews_opinion.xml'))
- feeds.append(
- (u'Sports News', u'http://www.thenews.com.pk/rss/thenews_sports.xml'))
- feeds.append(
- (u'Newspost', u'http://www.thenews.com.pk/rss/thenews_newspost.xml'))
-
- conversion_options = {'comments': description, 'tags': category, 'language': 'en',
- 'publisher': publisher, 'linearize_tables': True}
-
- extra_css = '''
- body{font-family:verdana,arial,helvetica,geneva,sans-serif;}
- .heading_txt {font-size: x-large; font-weight: bold; text-align: left;}
- .small_txt {text-align: left;}
- .dateline {font-size: x-small; color: #696969; margin-top: 1em; margin-bottom: 1em}
- '''
-
- def print_version(self, url):
- ignore, sep, main = url.rpartition('/')
-
- if main.startswith('updates.asp'):
- return url.replace('updates.asp', 'print.asp')
- elif main.startswith('top_story_detail.asp'):
- return url.replace('top_story_detail.asp', 'print3.asp')
- elif main.startswith('daily_detail.asp'):
- return url.replace('daily_detail.asp', 'print1.asp')
- else:
- return None
-
- def preprocess_html(self, soup):
- for tr in soup.findAll('tr', attrs={'bgcolor': True}):
- del tr['bgcolor']
-
- td = soup.find('td', attrs={'class': 'small_txt', 'height': '20'})
- if td:
- del td['height']
- td['class'] = 'dateline'
-
- return soup
diff --git a/recipes/theoldfoodie.recipe b/recipes/theoldfoodie.recipe
deleted file mode 100644
index 744ab4d1d3..0000000000
--- a/recipes/theoldfoodie.recipe
+++ /dev/null
@@ -1,29 +0,0 @@
-#!/usr/bin/env python
-
-__license__ = 'GPL v3'
-__copyright__ = '2009, Darko Miletic '
-'''
-www.theoldfoodie.com
-'''
-
-from calibre.web.feeds.news import BasicNewsRecipe
-
-
-class TheOldFoodie(BasicNewsRecipe):
- title = 'The Old Foodie'
- __author__ = 'Darko Miletic'
- description = 'Food blog'
- category = 'cuisine, food, blog'
- oldest_article = 30
- max_articles_per_feed = 100
- use_embedded_content = True
- no_stylesheets = True
- encoding = 'utf-8'
- language = 'en'
-
- conversion_options = {
- 'comments': description, 'tags': category, 'language': 'en'
- }
-
- feeds = [
- (u'Articles', u'http://www.theoldfoodie.com/feeds/posts/default?alt=rss')]
diff --git a/recipes/theonion.recipe b/recipes/theonion.recipe
deleted file mode 100644
index af97c0169a..0000000000
--- a/recipes/theonion.recipe
+++ /dev/null
@@ -1,89 +0,0 @@
-__license__ = 'GPL v3'
-__copyright__ = '2009-2013, Darko Miletic '
-
-'''
-theonion.com
-'''
-
-from calibre.web.feeds.news import BasicNewsRecipe
-
-
-class TheOnion(BasicNewsRecipe):
- title = 'The Onion'
- __author__ = 'Darko Miletic'
- description = "The Onion, America's Finest News Source, is an award-winning publication covering world, national, and * local issues. It is updated daily online and distributed weekly in select American cities." # noqa
- oldest_article = 2
- max_articles_per_feed = 100
- publisher = 'Onion, Inc.'
- category = 'humor, news, USA'
- language = 'en'
- no_stylesheets = True
- use_embedded_content = False
- encoding = 'utf-8'
- publication_type = 'newsportal'
- needs_subscription = 'optional'
- masthead_url = 'http://www.theonion.com/static/onion/img/logo_1x.png'
- cover_url = 'http://www.theonion.com/static/onion/img/logo_1x.png'
- extra_css = """
- body{font-family: Helvetica,Arial,sans-serif}
- .section_title{color: gray; text-transform: uppercase}
- .title{font-family: Georgia,serif}
- .meta{color: gray; display: inline}
- .has_caption{display: block}
- .caption{font-size: x-small; color: gray; margin-bottom: 0.8em}
- """
-
- conversion_options = {
- 'comment': description, 'tags': category, 'publisher': publisher, 'language': language
- }
-
- keep_only_tags = [
- dict(attrs={'class': lambda x: x and 'content-wrapper' in x.split()})]
- remove_attributes = ['lang', 'rel']
- remove_tags = [
- dict(name=['object', 'link', 'iframe', 'base', 'meta', 'button', 'footer', 'blockquote', 'figcaption']), dict(attrs={'class': lambda x: x and 'share-tools' in x.split()}), dict(attrs={'class': lambda x: x and 'content-meta' in x.split()}), dict(attrs={'class': 'below-article-tools'}), dict(name='div', attrs={'id': ['topshare', 'bottomshare']}) # noqa
- ]
-
- feeds = [
- (u'Daily', u'http://feeds.theonion.com/theonion/daily'), (u'Sports',
- u'http://feeds.theonion.com/theonion/sports')
- ]
-
- def get_browser(self):
- br = BasicNewsRecipe.get_browser(self)
- br.open('http://www.theonion.com/')
- if self.username is not None and self.password is not None:
- br.open('https://ui.ppjol.com/login/onion/u/j_spring_security_check')
- br.select_form(name='f')
- br['j_username'] = self.username
- br['j_password'] = self.password
- br.submit()
- return br
-
- def get_article_url(self, article):
- artl = BasicNewsRecipe.get_article_url(self, article)
- if artl.startswith('http://www.theonion.com/audio/'):
- artl = None
- return artl
-
- def preprocess_html(self, soup):
- for item in soup.findAll(style=True):
- del item['style']
- for item in soup.findAll('a'):
- limg = item.find('img')
- if item.string is not None:
- str = item.string
- item.replaceWith(str)
- else:
- if limg:
- item.name = 'div'
- item.attrs = []
- if not limg.get('alt'):
- limg['alt'] = 'image'
- else:
- str = self.tag_to_string(item)
- item.replaceWith(str)
- for item in soup.findAll('img'):
- if item.get('data-src'):
- item['src'] = item['data-src']
- return soup
diff --git a/recipes/thewest_au.recipe b/recipes/thewest_au.recipe
deleted file mode 100644
index a035a2d6b6..0000000000
--- a/recipes/thewest_au.recipe
+++ /dev/null
@@ -1,62 +0,0 @@
-__license__ = 'GPL v3'
-__copyright__ = '2010, Darko Miletic '
-'''
-thewest.com.au
-'''
-
-import re
-
-from calibre.web.feeds.news import BasicNewsRecipe
-
-
-class TheWest(BasicNewsRecipe):
- title = 'The West Australian'
- __author__ = 'Darko Miletic'
- description = 'News from Australia'
- publisher = 'thewest.com.au'
- category = 'news, politics, Australia'
- oldest_article = 2
- max_articles_per_feed = 200
- no_stylesheets = True
- encoding = 'utf8'
- use_embedded_content = False
- language = 'en_AU'
- remove_empty_feeds = True
- publication_type = 'newspaper'
- masthead_url = 'http://l.yimg.com/ao/i/mp/properties/news/02/wan/img/wan-logo-h49.png'
- extra_css = ' .article{font-family: Arial,Helvetica,sans-serif } .image{font-size: x-small} '
-
- preprocess_regexps = [
- (re.compile(r'.*?', re.DOTALL |
- re.IGNORECASE), lambda match: '')
- ]
-
- conversion_options = {
- 'comment': description, 'tags': category, 'publisher': publisher, 'language': language
- }
-
- remove_tags = [
- dict(attrs={'class': ['tools', 'lhs']}), dict(attrs={
- 'id': 'tools-bottom'}), dict(attrs={'href': 'http://twitter.com/thewest_com_au'})
- ]
- keep_only_tags = [dict(attrs={'class': 'mod article'})]
- remove_attributes = ['width', 'height']
-
- feeds = [
-
- (u'WA News', u'http://d.yimg.com/au.rss.news.yahoo.com/thewest/wa.xml'),
- (u'National', u'http://d.yimg.com/au.rss.news.yahoo.com/thewest/national.xml'),
- (u'World', u'http://d.yimg.com/au.rss.news.yahoo.com/thewest/world.xml'),
- (u'Offbeat', u'http://d.yimg.com/au.rss.news.yahoo.com/thewest/offbeat.xml'),
- (u'Business', u'http://d.yimg.com/au.rss.news.yahoo.com/thewest/business.xml'),
- (u'Sport', u'http://d.yimg.com/au.rss.news.yahoo.com/thewest/sport.xml'),
- (u'Entertainment', u'http://d.yimg.com/au.rss.news.yahoo.com/thewest/entertainment.xml'),
- (u'Travel', u'http://d.yimg.com/au.rss.news.yahoo.com/thewest/travel.xml'),
- (u'Life+Style', u'http://d.yimg.com/au.rss.news.yahoo.com/thewest/lifestyle.xml')
- ]
-
- def get_article_url(self, article):
- return article.get('guid', None)
-
- def preprocess_html(self, soup):
- return self.adeify_images(soup)
diff --git a/recipes/think_progress.recipe b/recipes/think_progress.recipe
deleted file mode 100644
index 8d1a4b7924..0000000000
--- a/recipes/think_progress.recipe
+++ /dev/null
@@ -1,13 +0,0 @@
-from calibre.web.feeds.news import BasicNewsRecipe
-
-
-class AdvancedUserRecipe1263409732(BasicNewsRecipe):
- title = u'Think Progress'
- description = u'A compilation of progressive articles on social and economic justice, healthy communities, media accountability, global and domestic security.' # noqa
- __author__ = u'Xanthan Gum'
- language = 'en'
-
- oldest_article = 7
- max_articles_per_feed = 100
-
- feeds = [(u'News Articles', u'http://thinkprogress.org/feed/')]
diff --git a/recipes/thn.recipe b/recipes/thn.recipe
deleted file mode 100644
index efd0e183f9..0000000000
--- a/recipes/thn.recipe
+++ /dev/null
@@ -1,19 +0,0 @@
-from calibre.web.feeds.news import BasicNewsRecipe
-
-
-class AdvancedUserRecipe1289990851(BasicNewsRecipe):
- title = u'The Hockey News'
- language = 'en_CA'
- __author__ = 'Nexus'
- oldest_article = 7
- max_articles_per_feed = 25
- no_stylesheets = True
- remove_tags = [dict(name='div', attrs={'class': 'article_info'}),
- dict(name='div', attrs={'class': 'photo_details'}),
- dict(name='div', attrs={'class': 'tool_menu'}),
- dict(name='div', attrs={'id': 'comments_container'}),
- dict(name='div', attrs={'id': 'wrapper'})]
- keep_only_tags = [dict(name='h1', attrs={'class': ['headline']}),
- dict(name='div', attrs={'class': ['box_container']})]
-
- feeds = [(u'THN', u'http://www.thehockeynews.com/rss/all_categories.xml')]
diff --git a/recipes/tidbits.recipe b/recipes/tidbits.recipe
deleted file mode 100644
index d2e9af8574..0000000000
--- a/recipes/tidbits.recipe
+++ /dev/null
@@ -1,51 +0,0 @@
-
-__license__ = 'GPL v3'
-__copyright__ = '2010, Darko Miletic '
-'''
-db.tidbits.com
-'''
-
-from calibre.web.feeds.news import BasicNewsRecipe
-
-
-class TidBITS(BasicNewsRecipe):
- title = 'TidBITS: Mac News for the Rest of Us'
- __author__ = 'Darko Miletic'
- description = 'Insightful news, reviews, and analysis of the Macintosh and Internet worlds'
- publisher = 'TidBITS Publishing Inc.'
- category = 'news, Apple, Macintosh, IT, Internet'
- oldest_article = 2
- max_articles_per_feed = 100
- no_stylesheets = True
- encoding = 'utf-8'
- use_embedded_content = False
- language = 'en'
- remove_empty_feeds = True
- masthead_url = 'http://db.tidbits.com/images/tblogo9.gif'
- extra_css = ' body{font-family: Georgia,"Times New Roman",Times,serif} '
-
- conversion_options = {
- 'comment': description, 'tags': category, 'publisher': publisher, 'language': language
- }
-
- keep_only_tags = [dict(name='div', attrs={'id': 'center_ajax_sub'})]
- remove_tags = [dict(name='div', attrs={'id': 'social-media'})]
-
- feeds = [
-
- (u'Business Apps', u'http://db.tidbits.com/feeds/business.rss'),
- (u'Entertainment', u'http://db.tidbits.com/feeds/entertainment.rss'),
- (u'External Links', u'http://db.tidbits.com/feeds/links.rss'),
- (u'Home Mac', u'http://db.tidbits.com/feeds/home.rss'),
- (u'Inside TidBITS', u'http://db.tidbits.com/feeds/inside.rss'),
- (u'iPod & iPhone', u'http://db.tidbits.com/feeds/ipod-iphone.rss'),
- (u'Just for Fun', u'http://db.tidbits.com/feeds/fun.rss'),
- (u'Macs & Mac OS X', u'http://db.tidbits.com/feeds/macs.rss'),
- (u'Media Creation', u'http://db.tidbits.com/feeds/creative.rss'),
- (u'Networking & Communications', u'http://db.tidbits.com/feeds/net.rss'),
- (u'Opinion & Editorial', u'http://db.tidbits.com/feeds/opinion.rss'),
- (u'Support & Problem Solving', u'http://db.tidbits.com/feeds/support.rss'),
- (u'Safe Computing', u'http://db.tidbits.com/feeds/security.rss'),
- (u'Tech News', u'http://db.tidbits.com/feeds/tech.rss'),
- (u'Software Watchlist', u'http://db.tidbits.com/feeds/watchlist.rss')
- ]
diff --git a/recipes/tijolaco.recipe b/recipes/tijolaco.recipe
deleted file mode 100644
index bd2200c172..0000000000
--- a/recipes/tijolaco.recipe
+++ /dev/null
@@ -1,25 +0,0 @@
-from calibre.web.feeds.recipes import BasicNewsRecipe
-
-
-class Tijolaco(BasicNewsRecipe):
- title = u'Tijolaco.com'
- __author__ = u'Diniz Bortolotto'
- description = u'Posts do Blog Tijola\xe7o.com'
- oldest_article = 7
- max_articles_per_feed = 50
- encoding = 'utf8'
- publisher = u'Brizola Neto'
- category = 'politics, Brazil'
- language = 'pt_BR'
- publication_type = 'politics portal'
- use_embedded_content = False
- no_stylesheets = True
- remove_javascript = True
-
- feeds = [(u'Blog Tijola\xe7o.com', u'http://feeds.feedburner.com/Tijolacoblog')]
-
- reverse_article_order = True
-
- keep_only_tags = [dict(name='div', attrs={'class': 'post'})]
-
- remove_tags = [dict(name='span', attrs={'class': 'com'})]
diff --git a/recipes/time_turk.recipe b/recipes/time_turk.recipe
deleted file mode 100644
index 4a47fdee3b..0000000000
--- a/recipes/time_turk.recipe
+++ /dev/null
@@ -1,14 +0,0 @@
-# -*- coding: utf-8 -*-
-
-from calibre.web.feeds.news import BasicNewsRecipe
-
-
-class BasicUserRecipe1325259641(BasicNewsRecipe):
- language = 'tr'
- __author__ = 'asalet_r'
- title = u'TimeT\xfcrk'
- oldest_article = 7
- max_articles_per_feed = 100
- auto_cleanup = True
-
- feeds = [(u'TimeT\xfcrk', u'http://www.timeturk.com/tr/rss/')]
diff --git a/recipes/timesnewroman.recipe b/recipes/timesnewroman.recipe
deleted file mode 100644
index 8ce8a56f6f..0000000000
--- a/recipes/timesnewroman.recipe
+++ /dev/null
@@ -1,50 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-
-__license__ = 'GPL v3'
-__copyright__ = u'2011, Silviu Cotoar\u0103'
-'''
-timesnewroman.ro
-'''
-
-from calibre.web.feeds.news import BasicNewsRecipe
-
-
-class TimesNewRoman(BasicNewsRecipe):
- title = u'Times New Roman'
- __author__ = u'Silviu Cotoar\u0103'
- description = u'Cotidian independent de umor voluntar'
- publisher = u'Times New Roman'
- oldest_article = 25
- language = 'ro'
- max_articles_per_feed = 100
- no_stylesheets = True
- use_embedded_content = False
- category = 'Ziare,Reviste,Fun'
- encoding = 'utf-8'
- cover_url = 'http://www.timesnewroman.ro/templates/TNRV2/images/logo.gif'
-
- conversion_options = {
- 'comments': description, 'tags': category, 'language': language, 'publisher': publisher
- }
-
- keep_only_tags = [
- dict(name='div', attrs={'id': 'page'})
- ]
-
- remove_tags = [
- dict(name='p', attrs={'class': ['articleinfo']}), dict(name='div', attrs={'class': ['shareTools']}), dict(
- name='div', attrs={'class': 'fb_iframe_widget'}), dict(name='div', attrs={'id': 'jc'})
- ]
-
- remove_tags_after = [
- dict(name='div', attrs={'class': 'fb_iframe_widget'}),
- dict(name='div', attrs={'id': 'jc'})
- ]
-
- feeds = [
- (u'Feeds', u'http://www.timesnewroman.ro/index.php?format=feed&type=rss')
- ]
-
- def preprocess_html(self, soup):
- return self.adeify_images(soup)
diff --git a/recipes/tnxm.recipe b/recipes/tnxm.recipe
deleted file mode 100644
index f2b84ca7a0..0000000000
--- a/recipes/tnxm.recipe
+++ /dev/null
@@ -1,30 +0,0 @@
-#!/usr/bin/env python
-
-__license__ = 'GPL v3'
-__copyright__ = '2009, Wasabi '
-'''
-tnxm.net
-'''
-
-from calibre.web.feeds.news import BasicNewsRecipe
-
-
-class TNXM(BasicNewsRecipe):
- title = u'Thanh Nien Xa Me'
- __author__ = 'Wasabi'
- description = 'Vietnam news and current affairs from TNXM - the finest Vietnamese bulletin board.'
- no_stylesheets = True
- language = 'vi'
-
- encoding = 'utf-8'
- recursions = 0
-
- remove_tags = [dict(name='div', attrs={'class': 'footer'})]
- extra_css = '.headline {font-size: x-large;} \n .fact { padding-top: 10pt }'
-
- feeds = [
- ('Index', 'http://tnxm.net/external.php?type=RSS'),
- ]
-
- def print_version(self, url):
- return url.replace('showthread.php?', 'printthread.php?pp=160&')
diff --git a/recipes/today_online.recipe b/recipes/today_online.recipe
deleted file mode 100644
index bce8d75c5b..0000000000
--- a/recipes/today_online.recipe
+++ /dev/null
@@ -1,63 +0,0 @@
-from calibre.ptempfile import PersistentTemporaryFile
-from calibre.web.feeds.news import BasicNewsRecipe
-
-
-class AdvancedUserRecipe1276486274(BasicNewsRecipe):
- title = u'Today Online - Singapore'
- publisher = 'MediaCorp Press Ltd - Singapore'
- __author__ = 'rty'
- category = 'news, Singapore'
- oldest_article = 7
- max_articles_per_feed = 100
- remove_javascript = True
- use_embedded_content = False
- no_stylesheets = True
- language = 'en_SG'
- temp_files = []
- articles_are_obfuscated = True
- masthead_url = 'http://www.todayonline.com/sites/all/themes/today/logo.png'
- conversion_options = {'linearize_tables': True}
- extra_css = '''
- .author{font-style: italic; font-size: small}
- .date{font-style: italic; font-size: small}
- .Headline{font-weight: bold; font-size: xx-large}
- .headerStrap{font-weight: bold; font-size: x-large; font-syle: italic}
- .bodyText{font-size: 4px;font-family: Times New Roman;}
- '''
- feeds = [
- (u'Hot News', u'http://www.todayonline.com/hot-news/feed'),
- (u'Singapore', u'http://www.todayonline.com/feed/singapore'),
- (u'World', u'http://www.todayonline.com/feed/world'),
- (u'Business', u'http://www.todayonline.com/feed/business'),
- (u'Tech', u'http://www.todayonline.com/feed/tech'),
- (u'Voices', u'http://www.todayonline.com/feed/voices'),
- (u'Commentary', u'http://www.todayonline.com/feed/Commentary'),
- (u'Daily Focus', u'http://www.todayonline.com/feed/daily-focus'),
- (u'Lifestyle', u'http://www.todayonline.com/feed/lifestyle'),
- ]
- keep_only_tags = [
- dict(name='div', attrs='print-content')
- ]
-
- remove_tags = [
- dict(name='div', attrs={'class': ['url', 'button']}),
- dict(name='div', attrs={'class': 'node-type-print-edition'}),
- dict(name='div', attrs={'class': ['field field-name-field-article-section field-type-taxonomy-term-reference field-label-hidden',
- 'field field-name-field-article-abstract field-type-text-long field-label-hidden', 'authoring']})
-
- ]
-
- def get_obfuscated_article(self, url):
- br = self.get_browser()
- br.open(url)
- response = br.follow_link(url_regex=r'/print/', nr=0)
- html = response.read()
- self.temp_files.append(PersistentTemporaryFile('_fa.html'))
- self.temp_files[-1].write(html)
- self.temp_files[-1].close()
- return self.temp_files[-1].name
-
- def preprocess_html(self, soup):
- for item in soup.findAll(style=True):
- del item['style']
- return soup
diff --git a/recipes/todays_zaman.recipe b/recipes/todays_zaman.recipe
deleted file mode 100644
index 3058fb5083..0000000000
--- a/recipes/todays_zaman.recipe
+++ /dev/null
@@ -1,177 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-from __future__ import print_function
-
-__license__ = 'GPL v3'
-__copyright__ = '2014, spswerling'
-'''
-www.todayszaman.com
-'''
-import re
-
-from calibre.web.feeds.recipes import BasicNewsRecipe
-
-try:
- from urllib.parse import urljoin
-except ImportError:
- from urlparse import urljoin
-
-
-class TodaysZaman(BasicNewsRecipe):
-
- title = u'Todays Zaman'
- __author__ = u'spswerling'
- description = 'English version of Turkish Daily "Zaman"'
- max_articles_per_feed = 100
- encoding = 'utf-8'
- category = 'news'
- language = 'en_TR'
- publication_type = 'newspaper'
- cover_img_url = 'http://medya.todayszaman.com/todayszaman/images/logo/todays_yenilogo.bmp' # yep, bmp
- masthead_url = cover_img_url
- remove_empty_feeds = True
-
- # on kindle, images can make things kind of fat. Slim them down.
- recursions = 0
- oldest_article = 1.5
- compress_news_images = True
- compress_news_images_max_size = 7
- scale_news_images = (150, 200) # (kindle touch: 600x800)
- useHighResImages = False
-
- sections = [
- (u'Columnists', u'columnists'),
- (u'Opinion', u'op-ed'),
- (u'World', u'world'),
- (u'National', u'national'),
- (u'Diplomacy', u'diplomacy'),
- (u'Business', u'business'),
- ]
-
- # util for creating remove_tags and keep_tags style regex matchers
- def tag_matcher(elt, attr, str):
- return dict(name=elt, attrs={attr: re.compile(str, re.IGNORECASE)})
-
- keep_only_tags = [
- tag_matcher('div', 'class', '^pageNewsDetailContainer$'),
- tag_matcher('div', 'class', '^pageColumnistDetailContainer$'),
- ]
-
- remove_tags = [
- tag_matcher('div', 'class', 'DetailKeyword'),
- tag_matcher('div', 'class', 'MainContentSocial'),
- tag_matcher('div', 'class', 'SocialNetwork'),
- tag_matcher('div', 'class', 'DetailLeftOther'),
- tag_matcher('div', 'class', 'RelatedNews'),
- tag_matcher('div', 'class', '^topMenuWrapper$'),
- tag_matcher('div', 'class', '^logo$'),
- tag_matcher('a', 'class', 'cf_email'),
- ]
- articles = {}
-
- def parse_index(self):
- for (sect_title, sect_uri) in self.sections:
- self.parse_section(sect_title, sect_uri)
-
- ans = []
- for k in self.articles:
- ans.append((k, self.articles[k]))
- return ans
-
- def parse_section(self, sect_title, sect_uri):
- url = 'http://www.todayszaman.com/' + sect_uri
- print('Start section ' + sect_title + ', ' + url)
- try:
- soup = self.index_to_soup(url)
- except:
- return
-
- # Find each article
- for div in soup.findAll('div'):
- div_class = div.get('class')
- if div_class:
- if div_class in ['pageColumnistsMainContent',
- 'pageCategoryContainer']:
- # print ' DIVCLASS' + div_class
- for link in div.findAll('a', href=True):
- self.process_link(sect_title, div_class, link)
-
- print('Finished section: ' + sect_title)
-
- def process_link(self, section_title, layout, link):
- def p(s):
- print('[PROCESS LINK] ' + s[0:80])
-
- href = link['href']
- full_href = urljoin('http://www.todayszaman.com/', href)
- next_sib = link.nextSibling
- child_h2 = link.find('h2')
- link_text = self.tag_to_string(link).strip()
- title_node = None
-
- if layout in ['pageColumnistsMainContent']:
- if child_h2:
- title_node = child_h2
- else:
- return
- elif layout in ['pageCategoryContainer']:
- top_title = link.find(attrs={'class': 'pageCategoryTopTitle'})
- if top_title:
- title_node = top_title
- elif (not link_text) and (next_sib and next_sib.find('h4')):
- title_node = next_sib.find('h4')
- elif (not link_text) and (next_sib and next_sib.find('h3')):
- title_node = next_sib.find('h3')
- elif link_text:
- title_node = link
-
- if title_node:
- title = self.tag_to_string(title_node)
- # print ' BING: ' + href + ', ' + title
- self.queue_article_link(section_title, full_href, title)
-
- def queue_article_link(self, section, url, title):
- if section not in self.articles:
- self.articles[section] = []
- self.articles[section].append(
- dict(title=title,
- url=url,
- date='',
- description='',
- author='',
- content=''))
-
- def populate_article_metadata(self, article, soup, first):
-
- def p(s):
- print('[POPULATE METADATA] ' + s[0:80])
-
- tnode = soup.find('title')
- if tnode:
- tstring = self.tag_to_string(tnode)
- if ' - ' in tstring:
- author = tstring.split('-')[0]
- if author:
- article.author = author
- article.title = author + ' - ' + article.title.strip()
- p('Add author to title:' + author)
-
- # known matches: pageNewsDetailDate, pageColumnistDetailLeftDate
- regex = re.compile('(DetailDate|DetailLeftDate)$', re.IGNORECASE)
- date_node = soup.find('div', {'class': regex})
- if date_node:
- date = self.tag_to_string(date_node).__str__().split('/')[0]
- date = ','.join(date.split(',')[:2]).strip()
- article.title = date + ' - ' + article.title.strip()
- article.date = date
- p('Add date to title: ' + date)
-
- strong = soup.find('strong')
- if strong:
- article.text_summary = self.tag_to_string(strong)
- p('Summary: ' + article.text_summary)
-
- def _dbg_soup_node(self, node):
- s = ' cls: ' + node.get('class').__str__().strip() + \
- ' txt: ' + self.tag_to_string(node).strip()
- return s
diff --git a/recipes/tomshardware.recipe b/recipes/tomshardware.recipe
deleted file mode 100644
index f8e4ef959e..0000000000
--- a/recipes/tomshardware.recipe
+++ /dev/null
@@ -1,73 +0,0 @@
-__license__ = 'GPL v3'
-__copyright__ = '2008-2013, Darko Miletic '
-'''
-tomshardware.com/us
-'''
-
-try:
- from urllib.parse import urlencode
-except ImportError:
- from urllib import urlencode
-from calibre.web.feeds.recipes import BasicNewsRecipe
-
-
-class Tomshardware(BasicNewsRecipe):
- title = "Tom's Hardware US"
- __author__ = 'Darko Miletic'
- description = 'Hardware reviews and News'
- publisher = "Tom's Hardware"
- category = 'news, IT, hardware, USA'
- no_stylesheets = True
- needs_subscription = 'optional'
- language = 'en'
- INDEX = 'http://www.tomshardware.com'
- LOGIN = INDEX + '/membres/'
- remove_javascript = True
- use_embedded_content = False
-
- conversion_options = {
- 'comment': description, 'tags': category, 'publisher': publisher, 'language': language
- }
-
- def get_browser(self):
- br = BasicNewsRecipe.get_browser(self)
- br.open(self.INDEX + '/us/')
- if self.username is not None and self.password is not None:
- data = urlencode({'action': 'login_action', 'r': self.INDEX + '/us/', 'login': self.username, 'mdp': self.password
- })
- br.open(self.LOGIN, data)
- return br
-
- remove_tags = [
- dict(name='div', attrs={'id': 'header'}), dict(name='object')
- ]
-
- feeds = [
-
- (u'Reviews', u'http://www.tomshardware.com/feeds/rss2/tom-s-hardware-us,18-2.xml'),
- (u'News', u'http://www.tomshardware.com/feeds/rss2/tom-s-hardware-us,18-1.xml')
- ]
-
- def print_version(self, url):
- main, sep, rest = url.rpartition('.html')
- rmain, rsep, article_id = main.rpartition(',')
- tmain, tsep, trest = rmain.rpartition('/reviews/')
- rind = 'http://www.tomshardware.com/news_print.php?p1='
- if tsep:
- rind = 'http://www.tomshardware.com/review_print.php?p1='
- return rind + article_id
-
- def cleanup_image_tags(self, soup):
- for item in soup.findAll('img'):
- for attrib in ['height', 'width', 'border', 'align']:
- item[attrib] = ''
- del item[attrib]
- return soup
-
- def preprocess_html(self, soup):
- del(soup.body['onload'])
- for item in soup.findAll(style=True):
- del item['style']
- for it in soup.findAll('span'):
- it.name = "div"
- return self.cleanup_image_tags(soup)
diff --git a/recipes/tomshardware_de.recipe b/recipes/tomshardware_de.recipe
deleted file mode 100644
index 140b9a2a91..0000000000
--- a/recipes/tomshardware_de.recipe
+++ /dev/null
@@ -1,58 +0,0 @@
-__license__ = 'GPL v3'
-__copyright__ = '2008, Kovid Goyal '
-
-'''
-Fetch tomshardware.
-'''
-
-from calibre.web.feeds.news import BasicNewsRecipe
-
-
-class cdnet(BasicNewsRecipe):
-
- title = 'tomshardware'
- description = 'computer news in german'
- __author__ = 'Oliver Niesner'
- use_embedded_content = False
- timefmt = ' [%d %b %Y]'
- max_articles_per_feed = 50
- no_stylesheets = True
- encoding = 'utf-8'
- language = 'de'
-
- remove_tags = [dict(id='outside-advert'),
- dict(id='advertRightWhite'),
- dict(id='header-advert'),
- dict(id='header-banner'),
- dict(id='header-menu'),
- dict(id='header-top'),
- dict(id='header-tools'),
- dict(id='nbComment'),
- dict(id='commentTools'),
- dict(id='internalSidebar'),
- dict(id='header-news-infos'),
- dict(id='header-news-tools'),
- dict(id='breadcrumbs'),
- dict(id='emailTools'),
- dict(id='bookmarkTools'),
- dict(id='printTools'),
- dict(id='header-nextNews'),
- dict(id='commentsBox'),
- dict(id='showComments'),
- dict(id='footer'),
- dict(id=''),
- dict(name='div', attrs={'class': 'pyjama'}),
- dict(name='div', attrs={'class': 'basicCentral'}),
- dict(name='li', attrs={
- 'class': 'simplePagination-previous'}),
- dict(name='form', attrs={'id': 'commentForm'}),
- dict(name='href', attrs={'class': 'comment'}),
- dict(name='div', attrs={'class': 'greyBoxR clearfix'}),
- dict(name='div', attrs={'class': 'greyBoxL clearfix'}),
- dict(name='div', attrs={'class': 'greyBox clearfix'}),
- dict(name='div', attrs={'class': 'labelized'}),
- dict(id='')]
- remove_tags_after = [dict(name='div', attrs={'class': 'labelized'})]
-
- feeds = [
- ('tomshardware', 'http://www.tomshardware.com/de/feeds/rss2/tom-s-hardware-de,12-1.xml')]
diff --git a/recipes/tomshardware_it.recipe b/recipes/tomshardware_it.recipe
deleted file mode 100644
index f366400055..0000000000
--- a/recipes/tomshardware_it.recipe
+++ /dev/null
@@ -1,26 +0,0 @@
-__license__ = 'GPL v3'
-from calibre.web.feeds.news import BasicNewsRecipe
-
-
-class AdvancedUserRecipe1327434170(BasicNewsRecipe):
- title = u"Tom's Hardware"
- oldest_article = 7
- max_articles_per_feed = 100
- auto_cleanup = True
- masthead_url = 'http://userlogos.org/files/logos/spaljeni/tomshardwre.png'
-
- def get_article_url(self, article):
- link = BasicNewsRecipe.get_article_url(self, article)
- if link.split('/')[-1] == "story01.htm":
- link = link.split('/')[-2]
- a = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'I', 'L', 'N', 'S']
- b = ['0', '.', '/', '?', '-', '=', '&',
- '_', 'http://', '.com', 'www.']
- for i in range(0, len(a)):
- link = link.replace('0' + a[-i], b[-i])
- return link
- feeds = [
- (u"Tom's Hardware", u'http://rss.feedsportal.com/c/32604/f/531080/index.rss')]
- __author__ = 'faber1971'
- description = 'Italian website on technology - v1.00 (28, January 2012)'
- language = 'it'
diff --git a/recipes/toronto_sun.recipe b/recipes/toronto_sun.recipe
deleted file mode 100644
index 7ecacd1f24..0000000000
--- a/recipes/toronto_sun.recipe
+++ /dev/null
@@ -1,75 +0,0 @@
-#!/usr/bin/env python
-
-__license__ = 'GPL v3'
-__copyright__ = '2009, Darko Miletic '
-'''
-www.torontosun.com
-'''
-
-from calibre.web.feeds.news import BasicNewsRecipe
-
-
-class TorontoSun(BasicNewsRecipe):
- title = 'Toronto SUN'
- __author__ = 'Darko Miletic and Sujata Raman'
- description = 'News from Canada'
- publisher = 'Toronto Sun'
- category = 'news, politics, Canada'
- oldest_article = 2
- max_articles_per_feed = 100
- no_stylesheets = True
- use_embedded_content = False
- encoding = 'cp1252'
- language = 'en_CA'
-
- conversion_options = {
- 'comment': description, 'tags': category, 'publisher': publisher, 'language': language
- }
-
- keep_only_tags = [
- dict(name='div', attrs={'class': ['articleHead', 'leftBox']}), dict(name='div', attrs={
- 'id': 'channelContent'}), dict(name='div', attrs={'id': 'rotateBox'}), dict(name='img')
- ]
- remove_tags = [
- dict(name='div', attrs={'class': ['bottomBox clear', 'bottomBox', 'breadCrumb', 'articleControls thin', 'articleControls thin short', 'extraVideoList']}), dict(name='h2', attrs={'class': 'microhead'}), dict(name='div', attrs={'id': 'commentsBottom'}), dict(name=['link', 'iframe', 'object']), dict(name='a', attrs={'rel': 'swap'}), dict(name='a', attrs={'href': '/news/haiti/'}), dict(name='ul', attrs={'class': ['tabs dl contentSwap', 'micrositeNav clearIt hList', 'galleryNav rotateNav']}) # noqa
- ]
-
- remove_tags_after = [
- dict(name='div', attrs={'class': 'bottomBox clear'}), dict(name='div', attrs={
- 'class': 'rotateBox'}), dict(name='div', attrs={'id': 'contentSwap'})
- ]
-
- extra_css = '''
- h1{font-family :Arial,Helvetica,sans-serif; font-size:large;}
- h2{font-family :Arial,Helvetica,sans-serif; font-size:medium; color:#666666;}
- h3{font-family :Arial,Helvetica,sans-serif; font-size:medium; color:#000000;}
- p{font-family :Arial,Helvetica,sans-serif; font-size:x-small;}
- .bold{font-family :Arial,Helvetica,sans-serif; font-size: xx-small;color:#444444;margin-left: 0px;}
- .subheading{font-family :Arial,Helvetica,sans-serif; font-size:medium; color:#000000; font-weight: bold;}
- .byline{color:#666666; font-family :Arial,Helvetica,sans-serif; font-size: xx-small;}
- .byline span{color:#666666; font-family :Arial,Helvetica,sans-serif; font-size: xx-small; text-transform: uppercase;}
- .updated{font-family :Arial,Helvetica,sans-serif; font-size: xx-small;}
- .galleryCaption{font-family :Arial,Helvetica,sans-serif; font-size: x-small;}
- .galleryUpdated{font-family :Arial,Helvetica,sans-serif; font-size: x-small;}
- '''
-
- feeds = [
-
- (u'News', u'http://www.torontosun.com/news/rss.xml'),
- (u'Canada', u'http://www.torontosun.com/news/canada/rss.xml'),
- (u'Columnists', u'http://www.torontosun.com/news/columnists/rss.xml'),
- (u'World', u'http://www.torontosun.com/news/world/rss.xml'),
- (u'Money', u'http://www.torontosun.com/money/rss.xml')
- ]
-
- def preprocess_html(self, soup):
- # To fetch images from the specified source
- for img in soup.findAll('img', src=True):
- url = img.get('src').split('?')[-1].partition('=')[-1]
- if url:
- img['src'] = url.split('&')[0].partition('=')[0]
- img['width'] = url.split(
- '&')[-1].partition('=')[-1].split('x')[0]
- img['height'] = url.split(
- '&')[-1].partition('=')[-1].split('x')[1]
- return soup
diff --git a/recipes/toyokeizai.recipe b/recipes/toyokeizai.recipe
deleted file mode 100644
index e6a5c30ced..0000000000
--- a/recipes/toyokeizai.recipe
+++ /dev/null
@@ -1,67 +0,0 @@
-__license__ = 'GPL v3'
-__copyright__ = '2010, Hiroshi Miura '
-'''
-www.toyokeizai.net
-'''
-
-import re
-
-from calibre.web.feeds.news import BasicNewsRecipe
-
-
-class Toyokeizai(BasicNewsRecipe):
- title = u'ToyoKeizai News'
- __author__ = 'Hiroshi Miura'
- oldest_article = 1
- max_articles_per_feed = 50
- description = 'Japanese traditional economy and business magazine, only for advanced subscribers supported'
- publisher = 'Toyokeizai Shinbun Sha'
- category = 'economy, magazine, japan'
- language = 'ja'
- encoding = 'euc-jp'
- index = 'http://member.toyokeizai.net/news/'
- remove_javascript = True
- no_stylesheets = True
- masthead_title = u'TOYOKEIZAI'
- needs_subscription = True
- timefmt = '[%y/%m/%d]'
- recursions = 5
- match_regexps = [r'page/\d+']
-
- keep_only_tags = [
- dict(name='div', attrs={'class': ['news']}),
- dict(name='div', attrs={'class': ["news_cont"]}),
- dict(name='div', attrs={'class': ["news_con"]}),
- # dict(name='div', attrs={'class':["norightsMessage"]})
- ]
- remove_tags = [{'class': "mt35 mgz"},
- {'class': "mt20 newzia"},
- {'class': "mt20 fontS"},
- {'class': "bk_btn_m"},
- dict(id='newzia_connect_member')
- ]
-
- def parse_index(self):
- feeds = []
- soup = self.index_to_soup(self.index)
- topstories = soup.find('ul', attrs={'class': 'list6'})
- if topstories:
- newsarticles = []
- for itt in topstories.findAll('li'):
- itema = itt.find('a', href=True)
- itemd = itt.find('span')
- newsarticles.append({
- 'title': itema.string, 'date': re.compile(r"\- ").sub("", itemd.string), 'url': 'http://member.toyokeizai.net' + itema['href'], 'description': itema['title'] # noqa
- })
- feeds.append(('news', newsarticles))
- return feeds
-
- def get_browser(self):
- br = BasicNewsRecipe.get_browser(self)
- if self.username is not None and self.password is not None:
- br.open('http://member.toyokeizai.net/norights/form/')
- br.select_form(nr=0)
- br['kaiin_id'] = self.username
- br['password'] = self.password
- br.submit()
- return br
diff --git a/recipes/tpm_uk.recipe b/recipes/tpm_uk.recipe
deleted file mode 100644
index 963855cca3..0000000000
--- a/recipes/tpm_uk.recipe
+++ /dev/null
@@ -1,41 +0,0 @@
-__license__ = 'GPL v3'
-__copyright__ = '2010-2015, Darko Miletic '
-'''
-www.philosophersmag.com
-'''
-
-from calibre.web.feeds.news import BasicNewsRecipe
-
-
-class TPM_uk(BasicNewsRecipe):
- title = "The Philosophers' Magazine"
- __author__ = 'Darko Miletic'
- description = 'Title says it all'
- publisher = "The Philosophers' Magazine"
- category = 'philosophy, news'
- oldest_article = 80
- max_articles_per_feed = 200
- no_stylesheets = True
- encoding = 'utf8'
- use_embedded_content = False
- language = 'en_GB'
- remove_empty_feeds = True
- publication_type = 'magazine'
- extra_css = """
- body{font-family: Raleway,sans-serif }
- """
-
- conversion_options = {
- 'comment': description, 'tags': category, 'publisher': publisher, 'language': language
- }
-
- remove_tags = [
- dict(name=['meta', 'link', 'base', 'iframe', 'embed', 'object', 'img'])]
- keep_only_tags = [
- dict(attrs={'class': ['article-title', 'article-content']})]
-
- feeds = [
-
- (u'Articles', u'http://www.philosophersmag.com/index.php/tpm-mag-articles?format=feed&type=rss'),
- (u'Reflections', u'http://www.philosophersmag.com/index.php/reflections?format=feed&type=rss')
- ]
diff --git a/recipes/tri_city_herald.recipe b/recipes/tri_city_herald.recipe
deleted file mode 100644
index a1cda789a6..0000000000
--- a/recipes/tri_city_herald.recipe
+++ /dev/null
@@ -1,27 +0,0 @@
-from calibre.web.feeds.news import BasicNewsRecipe
-
-
-class TriCityHeraldRecipe(BasicNewsRecipe):
- title = u'Tri-City Herald'
- description = 'The Tri-City Herald Mid-Columbia.'
- language = 'en'
- __author__ = 'Laura Gjovaag'
- oldest_article = 1.5
- max_articles_per_feed = 100
- no_stylesheets = True
- remove_javascript = True
- keep_only_tags = [
- dict(name='div', attrs={'id': 'story_header'}),
- dict(name='img', attrs={'class': 'imageCycle'}),
- dict(name='div', attrs={'id': ['cycleImageCaption', 'story_body']})
- ]
- remove_tags = [
- dict(name='div', attrs={'id': 'story_mlt'}),
- dict(name='a', attrs={'id': 'commentCount'}),
- dict(name=['script', 'noscript', 'style'])]
- extra_css = 'h1{font: bold 140%;} #cycleImageCaption{font: monospace 60%}'
-
- feeds = [
- (u'Tri-City Herald Mid-Columbia',
- u'http://www.tri-cityherald.com/901/index.rss')
- ]
diff --git a/recipes/trojmiasto_pl.recipe b/recipes/trojmiasto_pl.recipe
deleted file mode 100644
index 73d679aaf9..0000000000
--- a/recipes/trojmiasto_pl.recipe
+++ /dev/null
@@ -1,57 +0,0 @@
-import re
-
-from calibre.web.feeds.news import BasicNewsRecipe
-
-
-class Trojmiasto(BasicNewsRecipe):
- title = u'Tr\xf3jmiasto.pl'
- __author__ = 'fenuks'
- description = u'Wiadomości, imprezy, wydarzenia, spektakle.Gdańsk, Gdynia, Sopot - NOCLEGI, Katalog firm, repertuar kin, wydarzenia, przewodnik, mapa, kwatery, hotele. Portal regionalny trojmiasto.pl' # noqa
- category = ''
- language = 'pl'
- encoding = 'utf-8'
- extra_css = 'ul {list-style: none; padding:0; margin:0;}'
- cover_url = 'http://www.trojmiasto.pl/_img/toplong2/logo_trojmiasto.gif'
- use_embedded_content = False
- oldest_article = 7
- max_articles_per_feed = 100
- no_stylesheets = True
- remove_empty_feeds = True
- remove_javascript = True
- remove_attributes = ['style', 'font']
- ignore_duplicate_articles = {'title', 'url'}
-
- preprocess_regexps = [(re.compile(u'Czytaj więcej.*?