Merge branch 'master' of https://github.com/unkn0w7n/calibre
BIN
recipes/icons/sonar21.png
Normal file
After Width: | Height: | Size: 1.2 KiB |
Before Width: | Height: | Size: 1.3 KiB |
Before Width: | Height: | Size: 833 B |
Before Width: | Height: | Size: 761 B |
Before Width: | Height: | Size: 489 B |
Before Width: | Height: | Size: 1.0 KiB |
Before Width: | Height: | Size: 977 B |
Before Width: | Height: | Size: 269 B |
Before Width: | Height: | Size: 1.4 KiB |
Before Width: | Height: | Size: 484 B |
Before Width: | Height: | Size: 1.7 KiB |
Before Width: | Height: | Size: 403 B |
Before Width: | Height: | Size: 917 B |
Before Width: | Height: | Size: 590 B |
Before Width: | Height: | Size: 353 B |
Before Width: | Height: | Size: 936 B |
Before Width: | Height: | Size: 1.5 KiB |
Before Width: | Height: | Size: 688 B |
Before Width: | Height: | Size: 672 B |
Before Width: | Height: | Size: 1.8 KiB |
Before Width: | Height: | Size: 671 B |
Before Width: | Height: | Size: 2.3 KiB |
Before Width: | Height: | Size: 449 B |
Before Width: | Height: | Size: 370 B |
Before Width: | Height: | Size: 1.8 KiB |
Before Width: | Height: | Size: 925 B |
Before Width: | Height: | Size: 753 B |
Before Width: | Height: | Size: 890 B |
Before Width: | Height: | Size: 878 B |
Before Width: | Height: | Size: 681 B |
Before Width: | Height: | Size: 328 B |
Before Width: | Height: | Size: 1.3 KiB |
Before Width: | Height: | Size: 350 B |
Before Width: | Height: | Size: 251 B |
Before Width: | Height: | Size: 258 B |
Before Width: | Height: | Size: 586 B |
Before Width: | Height: | Size: 645 B |
Before Width: | Height: | Size: 296 B |
Before Width: | Height: | Size: 560 B |
Before Width: | Height: | Size: 1.1 KiB |
Before Width: | Height: | Size: 642 B |
Before Width: | Height: | Size: 418 B |
Before Width: | Height: | Size: 174 B |
Before Width: | Height: | Size: 576 B |
Before Width: | Height: | Size: 685 B |
Before Width: | Height: | Size: 685 B |
Before Width: | Height: | Size: 1.2 KiB |
Before Width: | Height: | Size: 415 B |
Before Width: | Height: | Size: 1.5 KiB |
Before Width: | Height: | Size: 614 B |
Before Width: | Height: | Size: 330 B |
Before Width: | Height: | Size: 297 B |
Before Width: | Height: | Size: 336 B |
Before Width: | Height: | Size: 1.8 KiB |
Before Width: | Height: | Size: 74 B |
Before Width: | Height: | Size: 205 B |
Before Width: | Height: | Size: 159 B |
Before Width: | Height: | Size: 903 B |
Before Width: | Height: | Size: 822 B |
Before Width: | Height: | Size: 1.1 KiB |
Before Width: | Height: | Size: 153 B |
Before Width: | Height: | Size: 768 B |
Before Width: | Height: | Size: 282 B |
Before Width: | Height: | Size: 317 B |
BIN
recipes/icons/unz.png
Normal file
After Width: | Height: | Size: 494 B |
42
recipes/sonar21.recipe
Normal file
@ -0,0 +1,42 @@
|
||||
#!/usr/bin/env python
|
||||
# vim:fileencoding=utf-8
|
||||
from calibre.web.feeds.news import BasicNewsRecipe, classes
|
||||
|
||||
|
||||
class Sonar21(BasicNewsRecipe):
|
||||
title = 'Sonar21'
|
||||
__author__ = 'unkn0wn'
|
||||
oldest_article = 7
|
||||
language = 'en_US'
|
||||
max_articles_per_feed = 100
|
||||
use_embedded_content = False
|
||||
masthead_url = 'https://sonar21.com/wp-content/uploads/2024/10/logo_999999_720x216.png'
|
||||
cover_url = 'https://sonar21.com/wp-content/uploads/2024/09/sonar21_backplate_vertical.jpg'
|
||||
encoding = 'utf-8'
|
||||
browser_type = 'webengine'
|
||||
no_stylesheets = True
|
||||
remove_attributes = ['style', 'height', 'width']
|
||||
extra_css = '.entry-meta, .wp-element-caption, .wp-block-image { font-size: small; }'
|
||||
|
||||
keep_only_tags = [classes('entry-header entry-content')]
|
||||
|
||||
remove_tags = [
|
||||
dict(name=['iframe', 'svg']),
|
||||
classes('addtoany_share_save_container wpd-avatar'),
|
||||
]
|
||||
|
||||
recipe_specific_options = {
|
||||
'days': {
|
||||
'short': 'Oldest article to download from this news source. In days ',
|
||||
'long': 'For example, 0.5, gives you articles from the past 12 hours',
|
||||
'default': str(oldest_article),
|
||||
},
|
||||
}
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
BasicNewsRecipe.__init__(self, *args, **kwargs)
|
||||
d = self.recipe_specific_options.get('days')
|
||||
if d and isinstance(d, str):
|
||||
self.oldest_article = float(d)
|
||||
|
||||
feeds = ['https://sonar21.com/feed']
|
@ -6,6 +6,7 @@
|
||||
# Copyright: Nathan Cook (nathan.cook@gmail.com)
|
||||
##
|
||||
# Written: 2020-12-18
|
||||
# Updated: 2024-11-04
|
||||
##
|
||||
|
||||
__license__ = 'GNU General Public License v3 – https://www.gnu.org/licenses/gpl-3.0.html'
|
||||
@ -14,6 +15,7 @@ __version__ = 'v0.1.1'
|
||||
__date__ = '2020-12-19'
|
||||
__author__ = 'topynate'
|
||||
|
||||
import re
|
||||
import json
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
@ -21,21 +23,36 @@ from mechanize import Request
|
||||
|
||||
|
||||
class Substack(BasicNewsRecipe):
|
||||
title = 'Substack'
|
||||
__author__ = 'topynate'
|
||||
title = 'Substack'
|
||||
__author__ = 'topynate, unkn0wn'
|
||||
description = 'Use advanced menu if you want to add your own substack handles.'
|
||||
oldest_article = 7
|
||||
language = 'en'
|
||||
max_articles_per_feed = 100
|
||||
auto_cleanup = True
|
||||
auto_cleanup = True
|
||||
auto_cleanup_keep = '//*[@class="subtitle"]'
|
||||
needs_subscription = 'optional'
|
||||
use_embedded_content = False
|
||||
masthead_url = 'https://substack.com/img/substack_wordmark.png'
|
||||
cover_url = 'https://substack.com/img/substack.png'
|
||||
extra_css = '.captioned-image-container, .image-container {font-size: small;}'
|
||||
|
||||
recipe_specific_options = {
|
||||
'auths': {
|
||||
'short': 'enter the @handles you subscribe to:\nseperated by a space',
|
||||
'long': 'julianmacfarlane ianleslie .... ....',
|
||||
'default': 'julianmacfarlane ianleslie thesalvo',
|
||||
},
|
||||
'days': {
|
||||
'short': 'Oldest article to download from this news source. In days ',
|
||||
'long': 'For example, 0.5, gives you articles from the past 12 hours',
|
||||
'default': str(oldest_article)
|
||||
}
|
||||
'default': str(oldest_article),
|
||||
},
|
||||
'res': {
|
||||
'short': 'For hi-res images, select a resolution from the\nfollowing options: 800, 1000, 1200 or 1500',
|
||||
'long': 'This is useful for non e-ink devices, and for a lower file size\nthan the default, use 400 or 300.',
|
||||
'default': '600',
|
||||
},
|
||||
}
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
@ -44,12 +61,12 @@ class Substack(BasicNewsRecipe):
|
||||
if d and isinstance(d, str):
|
||||
self.oldest_article = float(d)
|
||||
|
||||
# Every Substack publication has an RSS feed at https://{name}.substack.com/feed.
|
||||
# The same URL provides either all posts, or all free posts + previews of paid posts,
|
||||
# depending on whether you're logged in.
|
||||
feeds = [
|
||||
('Novum Lumen', 'https://novumlumen.substack.com/feed'), # gratuitously self-promotional example
|
||||
]
|
||||
# Every Substack publication has an RSS feed at https://{name}.substack.com/feed.
|
||||
# The same URL provides either all posts, or all free posts + previews of paid posts,
|
||||
# depending on whether you're logged in.
|
||||
# feeds = [
|
||||
# ('Novum Lumen', 'https://novumlumen.substack.com/feed'), # gratuitously self-promotional example
|
||||
# ]
|
||||
|
||||
def get_browser(self):
|
||||
br = BasicNewsRecipe.get_browser(self)
|
||||
@ -70,3 +87,24 @@ class Substack(BasicNewsRecipe):
|
||||
if res.getcode() != 200:
|
||||
raise ValueError('Login failed, check username and password')
|
||||
return br
|
||||
|
||||
def get_feeds(self):
|
||||
ans = []
|
||||
u = self.recipe_specific_options.get('auths')
|
||||
if u and isinstance(u, str):
|
||||
for x in u.split():
|
||||
ans.append('https://' + x.replace('@', ' ') + '.substack.com/feed')
|
||||
return ans
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
res = '600'
|
||||
w = self.recipe_specific_options.get('res')
|
||||
if w and isinstance(w, str):
|
||||
res = w
|
||||
for img in soup.findAll('img', attrs={'src': True}):
|
||||
img['src'] = re.sub(r'w_\d+', 'w_' + res, img['src'])
|
||||
for src in soup.findAll(['source', 'svg']):
|
||||
src.extract()
|
||||
for but in soup.findAll(attrs={'class': ['button-wrapper']}):
|
||||
but.extract()
|
||||
return soup
|
||||
|
@ -1,69 +0,0 @@
|
||||
import re
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
def classes(classes):
|
||||
q = frozenset(classes.split(' '))
|
||||
return dict(
|
||||
attrs={'class': lambda x: x and frozenset(x.split()).intersection(q)}
|
||||
)
|
||||
|
||||
|
||||
class TN(BasicNewsRecipe):
|
||||
title = u'Taipei Times'
|
||||
language = 'en_TW'
|
||||
__author__ = 'Krittika Goyal'
|
||||
oldest_article = 1 # days
|
||||
max_articles_per_feed = 25
|
||||
use_embedded_content = False
|
||||
|
||||
no_stylesheets = True
|
||||
|
||||
keep_only_tags = [
|
||||
dict(name='h1'),
|
||||
dict(name='h3', attrs={'class': 'a'}),
|
||||
classes('main_ipic reporter text page'),
|
||||
]
|
||||
|
||||
feeds = [
|
||||
('Front Page', 'http://www.taipeitimes.com/xml/front.rss'),
|
||||
('Editorials', 'http://www.taipeitimes.com/xml/editorials.rss'),
|
||||
('Taiwan', 'http://www.taipeitimes.com/xml/taiwan.rss'),
|
||||
('Features', 'http://www.taipeitimes.com/xml/feat.rss'),
|
||||
('Business', 'http://www.taipeitimes.com/xml/biz.rss'),
|
||||
('World', 'http://www.taipeitimes.com/xml/world.rss'),
|
||||
('Sports', 'http://www.taipeitimes.com/xml/sport.rss'),
|
||||
]
|
||||
|
||||
def preprocess_html(self, soup, *a):
|
||||
for div in soup.findAll(**classes('page')):
|
||||
for a in div.findAll('a', href=True):
|
||||
a['data-calibre-follow-link'] = '1'
|
||||
if a['href'].startswith('/'):
|
||||
a['href'] = 'http://www.taipeitimes.com' + a['href']
|
||||
return soup
|
||||
|
||||
recursions = 1
|
||||
|
||||
def is_link_wanted(self, url, tag):
|
||||
digit = re.search(r'/(\d+)$', url)
|
||||
if digit is not None and tag['data-calibre-follow-link'] == '1' and re.match(r'\d+', self.tag_to_string(tag)) is not None:
|
||||
if int(digit.group(1)) > 1:
|
||||
return True
|
||||
return False
|
||||
|
||||
def postprocess_html(self, soup, *a):
|
||||
for div in soup.findAll(**classes('page')):
|
||||
div.extract()
|
||||
return soup
|
||||
|
||||
# def parse_index(self):
|
||||
# return [(
|
||||
# 'Articles', [{
|
||||
# 'title':
|
||||
# 'test',
|
||||
# 'url':
|
||||
# 'http://www.taipeitimes.com/News/editorials/archives/2019/02/26/2003710411'
|
||||
# }]
|
||||
# )]
|
@ -1,32 +0,0 @@
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
|
||||
|
||||
class TaNea(BasicNewsRecipe):
|
||||
title = u'Ta Nea'
|
||||
__author__ = 'Pan'
|
||||
oldest_article = 1
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
language = 'el'
|
||||
|
||||
remove_tags_before = dict(name='div', attrs={'id': 'print-body'})
|
||||
remove_tags_after = dict(name='div', attrs={'id': 'text'})
|
||||
|
||||
feeds = [
|
||||
(u'\xce\x95\xce\xbb\xce\xbb\xce\xac\xce\xb4\xce\xb1',
|
||||
u'http://www.tanea.gr/default.asp?pid=66&la=1'),
|
||||
(u'\xce\x9a\xcf\x8c\xcf\x83\xce\xbc\xce\xbf\xcf\x82',
|
||||
u'http://www.tanea.gr/default.asp?pid=67&la=1'),
|
||||
(u'\xce\x9f\xce\xb9\xce\xba\xce\xbf\xce\xbd\xce\xbf\xce\xbc\xce\xaf\xce\xb1',
|
||||
u'http://www.tanea.gr/default.asp?pid=68&la=1'),
|
||||
(u'\xce\xa0\xce\xbf\xce\xbb\xce\xb9\xcf\x84\xce\xb9\xcf\x83\xce\xbc\xcf\x8c\xcf\x82',
|
||||
u'http://www.tanea.gr/default.asp?pid=69&la=1'),
|
||||
(u'\xce\x93\xce\xbd\xcf\x8e\xce\xbc\xce\xb5\xcf\x82',
|
||||
u'http://www.tanea.gr/default.asp?pid=79&la=1'),
|
||||
(u'\xce\xa1\xce\xb9\xcf\x80\xce\xad\xcf\x82',
|
||||
u'http://www.tanea.gr/default.asp?pid=80&la=1'),
|
||||
(u'\xce\x91\xce\xb9\xcf\x87\xce\xbc\xce\xad\xcf\x82',
|
||||
u'http://www.tanea.gr/default.asp?pid=81&la=1')]
|
||||
|
||||
def print_version(self, url):
|
||||
return url.replace('http://www.tanea.gr/default.asp?pid=2', 'http://www.tanea.gr/default.asp?pid=96')
|
@ -1,46 +0,0 @@
|
||||
import re
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class tanuki(BasicNewsRecipe):
|
||||
title = u'Tanuki'
|
||||
oldest_article = 7
|
||||
__author__ = 'fenuks'
|
||||
description = u'Tanuki - portal o anime i mandze.'
|
||||
category = 'anime, manga'
|
||||
language = 'pl'
|
||||
max_articles_per_feed = 100
|
||||
encoding = 'utf-8'
|
||||
autocleanup = True
|
||||
extra_css = 'ul {list-style: none; padding: 0; margin: 0;} .kadr{float: left;} .dwazdania {float: right;}'
|
||||
preprocess_regexps = [(re.compile(u'<h3><a class="screen".*?</h3>', re.DOTALL), lambda match: ''), (re.compile(
|
||||
type(u'')(r'<div><a href="/strony/((manga)|(anime))/[0-9]+?/oceny(\-redakcji){0,1}">Zobacz jak ocenili</a></div>'), re.DOTALL), lambda match: '')]
|
||||
remove_empty_feeds = True
|
||||
no_stylesheets = True
|
||||
keep_only_tags = [dict(attrs={'class': ['animename', 'storyname', 'nextarrow', 'sideinfov', 'sidelinfov', 'sideinfo', 'sidelinfo']}), dict(name='table', attrs={ 'summary': 'Technikalia'}), dict(attrs={'class': ['chaptername', 'copycat']}), dict(id='rightcolumn'), dict(attrs={'class': ['headn_tt', 'subtable']})] # noqa
|
||||
remove_tags = [dict(name='div', attrs={'class': 'screen'}), dict(id='randomtoplist'), dict(attrs={'class': 'note'})]
|
||||
feeds = [
|
||||
(u'Anime', u'http://anime.tanuki.pl/rss_anime.xml'),
|
||||
(u'Manga', u'http://manga.tanuki.pl/rss_manga.xml'),
|
||||
(u'Tomiki', u'http://manga.tanuki.pl/rss_mangabooks.xml'),
|
||||
|
||||
(u'Artyku\u0142y', u'http://czytelnia.tanuki.pl/rss_czytelnia_artykuly.xml'),
|
||||
(u'Opowiadania', u'http://czytelnia.tanuki.pl/rss_czytelnia.xml')]
|
||||
|
||||
def append_page(self, soup, appendtag):
|
||||
nexturl = appendtag.find(attrs={'class': 'nextarrow'})
|
||||
if nexturl:
|
||||
while nexturl:
|
||||
soup2 = self.index_to_soup(
|
||||
'http://czytelnia.tanuki.pl' + nexturl['href'])
|
||||
nexturl = soup2.find(attrs={'class': 'nextarrow'})
|
||||
pagetext = soup2.find(
|
||||
attrs={'class': ['chaptername', 'copycat']})
|
||||
pos = len(appendtag.contents)
|
||||
appendtag.insert(pos, pagetext)
|
||||
pagetext = soup2.find(attrs={'class': 'copycat'})
|
||||
pos = len(appendtag.contents)
|
||||
appendtag.insert(pos, pagetext)
|
||||
for r in appendtag.findAll(attrs={'class': 'nextarrow'}):
|
||||
r.extract()
|
@ -1,73 +0,0 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2010, Lars Jacob jacob.lars at gmail.com'
|
||||
__docformat__ = 'restructuredtext de'
|
||||
|
||||
'''
|
||||
www.taz.de/digiabo
|
||||
'''
|
||||
import os
|
||||
import zipfile
|
||||
|
||||
from calibre.ptempfile import PersistentTemporaryFile
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
try:
|
||||
from urllib.error import HTTPError
|
||||
from urllib.request import HTTPBasicAuthHandler, build_opener, install_opener, urlopen
|
||||
except ImportError:
|
||||
from urllib2 import HTTPBasicAuthHandler, HTTPError, build_opener, install_opener, urlopen
|
||||
|
||||
|
||||
class TazDigiabo(BasicNewsRecipe):
|
||||
|
||||
title = u'Taz Digiabo'
|
||||
description = u'Das EPUB DigiAbo der Taz'
|
||||
language = 'de'
|
||||
lang = 'de-DE'
|
||||
|
||||
__author__ = 'Lars Jacob'
|
||||
needs_subscription = True
|
||||
|
||||
conversion_options = {
|
||||
'no_default_epub_cover': True
|
||||
}
|
||||
|
||||
def build_index(self):
|
||||
domain = "http://dl.taz.de"
|
||||
|
||||
url = domain + "/epub/"
|
||||
|
||||
auth_handler = HTTPBasicAuthHandler()
|
||||
auth_handler.add_password(realm='TAZ-ABO',
|
||||
uri=url,
|
||||
user=self.username,
|
||||
passwd=self.password)
|
||||
opener = build_opener(auth_handler)
|
||||
install_opener(opener)
|
||||
|
||||
try:
|
||||
f = urlopen(url)
|
||||
except HTTPError:
|
||||
self.report_progress(0, _('Can\'t login to download issue'))
|
||||
raise ValueError('Failed to login, check your username and'
|
||||
' password')
|
||||
|
||||
tmp = PersistentTemporaryFile(suffix='.epub')
|
||||
self.report_progress(0, _('downloading epub'))
|
||||
tmp.write(f.read())
|
||||
tmp.close()
|
||||
|
||||
zfile = zipfile.ZipFile(tmp.name, 'r')
|
||||
self.report_progress(0, _('extracting epub'))
|
||||
|
||||
zfile.extractall(self.output_dir)
|
||||
|
||||
tmp.close()
|
||||
index = os.path.join(self.output_dir, 'content.opf')
|
||||
|
||||
self.report_progress(1, _('epub downloaded and extracted'))
|
||||
|
||||
return index
|
@ -1,15 +0,0 @@
|
||||
__license__ = 'GPL v3'
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class AdvancedUserRecipe1327051385(BasicNewsRecipe):
|
||||
title = u'Tech Economy'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
auto_cleanup = True
|
||||
masthead_url = 'http://www.techeconomy.it/wp-content/uploads/2012/01/Logo-TE9.png'
|
||||
feeds = [(u'Tech Economy', u'http://www.techeconomy.it/feed/')]
|
||||
remove_tags_after = [dict(name='div', attrs={'class': 'cab-author-name'})]
|
||||
__author__ = 'faber1971'
|
||||
description = 'Italian website on technology - v1.00 (28, January 2012)'
|
||||
language = 'it'
|
@ -1,46 +0,0 @@
|
||||
#!/usr/bin/env python
|
||||
__license__ = 'GPL v3'
|
||||
__author__ = 'Lorenzo Vigentini'
|
||||
__copyright__ = '2009, Lorenzo Vigentini <l.vigentini at gmail.com>'
|
||||
__version__ = 'v1.01'
|
||||
__date__ = '14, January 2010'
|
||||
|
||||
'''
|
||||
http://www.techworld.com/
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class techworld(BasicNewsRecipe):
|
||||
__author__ = 'Lorenzo Vigentini'
|
||||
description = 'Techworld offers the latest breaking IT industry news, product reviews, enterprise software downloads, how-to articles and expert blogs for technical professionals and enterprise users in the UK' # noqa
|
||||
|
||||
title = 'TechWorld'
|
||||
publisher = 'IDG Communication'
|
||||
category = ('Apple, Mac, video, computing, product reviews, '
|
||||
'editing, cameras, production')
|
||||
|
||||
language = 'en'
|
||||
timefmt = '[%a, %d %b, %Y]'
|
||||
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 15
|
||||
use_embedded_content = False
|
||||
recursion = 10
|
||||
|
||||
remove_javascript = True
|
||||
no_stylesheets = True
|
||||
auto_cleanup = True
|
||||
|
||||
feeds = [
|
||||
(u'News', u'http://www.techworld.com/news/rss'),
|
||||
(u'Tutorial', u'http://www.techworld.com/tutorial/rss'),
|
||||
(u'Reviews', u'http://www.techworld.com/review/rss'),
|
||||
(u'Features', u'http://www.techworld.com/features/rss'),
|
||||
(u'Analysis', u'http://www.techworld.com/analysis/rss'),
|
||||
(u'Galleries',
|
||||
u'http://www.techworld.com/picture-gallery/rss'),
|
||||
(u'TechWorld Blogs',
|
||||
u'http://www.techworld.com/blog/rss'),
|
||||
]
|
@ -1,31 +0,0 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class TechnologyReview(BasicNewsRecipe):
|
||||
title = u'Technology Review'
|
||||
__author__ = 'rty'
|
||||
description = 'MIT Technology Magazine (from RSS feeds)'
|
||||
publisher = 'Technology Review Inc.'
|
||||
category = 'Technology, Innovation, R&D'
|
||||
language = 'en'
|
||||
oldest_article = 14
|
||||
max_articles_per_feed = 100
|
||||
No_stylesheets = True
|
||||
auto_cleanup = True
|
||||
extra_css = """
|
||||
.ArticleBody {font: normal; text-align: justify}
|
||||
.headline {font: bold x-large}
|
||||
.subheadline {font: italic large}
|
||||
"""
|
||||
feeds = [
|
||||
(u'Computing',
|
||||
u'http://feeds.technologyreview.com/technology_review_Computing'),
|
||||
(u'Energy',
|
||||
u'http://feeds.technologyreview.com/technology_review_Energy'),
|
||||
(u'Materials',
|
||||
u'http://feeds.technologyreview.com/technology_review_Materials'),
|
||||
(u'Biomedicine',
|
||||
u'http://feeds.technologyreview.com/technology_review_Biotech'),
|
||||
(u'Business',
|
||||
u'http://feeds.technologyreview.com/technology_review_Biztech')
|
||||
]
|
@ -1,69 +0,0 @@
|
||||
#!/usr/bin/env python
|
||||
# vim:fileencoding=utf-8
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2010, Anton Gillert <atx at binaryninja.de>'
|
||||
|
||||
'''
|
||||
Technology Review (deutsch) - heise.de/tr
|
||||
'''
|
||||
|
||||
import re
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class TechnologyReviewDe(BasicNewsRecipe):
|
||||
title = 'Technology Review'
|
||||
__author__ = 'Anton Gillert, schuster'
|
||||
description = 'Technology news from Germany'
|
||||
language = 'de'
|
||||
|
||||
oldest_article = 14
|
||||
max_articles_per_feed = 50
|
||||
use_embedded_content = False
|
||||
no_stylesheets = True
|
||||
remove_javascript = True
|
||||
|
||||
masthead_url = 'http://1.f.ix.de/imgs/02/3/0/8/5/2/8/tr_logo-544bd18881c81263.png'
|
||||
|
||||
feeds = [
|
||||
('News', 'http://www.heise.de/tr/rss/news-atom.xml'),
|
||||
('Blog', 'http://www.heise.de/tr/rss/blog-atom.xml')
|
||||
]
|
||||
|
||||
keep_only_tags = [
|
||||
dict(name='article')
|
||||
]
|
||||
|
||||
remove_tags = [
|
||||
dict(name='nav'),
|
||||
dict(name='figure', attrs={'class': 'logo'}),
|
||||
dict(name='hr')
|
||||
]
|
||||
|
||||
extra_css = '.bild_zentriert {font-size: 0.6em} \
|
||||
.source {font-size: 0.6em}'
|
||||
|
||||
def get_cover_url(self):
|
||||
self.cover_url = ''
|
||||
soup = self.index_to_soup('http://www.heise.de/tr/magazin/')
|
||||
img = soup.find('img', alt=re.compile(
|
||||
'Titelbild Technology Review'), src=True)
|
||||
if img:
|
||||
self.cover_url = 'http://www.heise.de' + img['src']
|
||||
return self.cover_url
|
||||
|
||||
def print_version(self, url):
|
||||
return url + '?view=print'
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
# remove style attributes
|
||||
for item in soup.findAll(attrs={'style': True}):
|
||||
del item['style']
|
||||
# remove reference to article source
|
||||
for p in soup.findAll('p'):
|
||||
if 'URL dieses Artikels:' in self.tag_to_string(p):
|
||||
p.extract()
|
||||
return soup
|
@ -1,64 +0,0 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class TechTarget(BasicNewsRecipe):
|
||||
title = u'Techtarget'
|
||||
__author__ = 'Julio:map'
|
||||
description = '''IT Infrastructure related blogs
|
||||
from Techtarget'''
|
||||
publisher = 'Techtarget'
|
||||
category = 'IT, Infrastructure'
|
||||
oldest_article = 7
|
||||
language = 'en'
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
needs_subscription = True
|
||||
auto_cleanup = False
|
||||
LOGIN = u'http://searchservervirtualization.techtarget.com/login'
|
||||
|
||||
def get_browser(self):
|
||||
br = BasicNewsRecipe.get_browser(self)
|
||||
if self.username is not None:
|
||||
br.open(self.LOGIN)
|
||||
br.select_form(nr=1)
|
||||
br['email'] = self.username
|
||||
if self.password is not None:
|
||||
br['password'] = self.password
|
||||
br.submit()
|
||||
return br
|
||||
|
||||
keep_only_tags = [dict(name='div', attrs={'id': 'article'}), dict(
|
||||
name='div', attrs={'class': 'entry'})]
|
||||
remove_tags = [
|
||||
dict(name='div', attrs={'id': ['articleToolbar', 'relatedContent']})]
|
||||
remove_tags_after = [dict(name='div', attrs={'id': 'relatedContent'})]
|
||||
|
||||
feeds = [
|
||||
(u'IT news and analysis for CIOs',
|
||||
u'http://feeds.pheedo.com/SearchCIOITNewsAndAnalysisForCIOs'),
|
||||
(u'TotalCIO', u'http://feeds.pheedo.com/1532.xml'),
|
||||
(u'SearchCIO-Midmarket: Technology news and tips for midmarket CIOs',
|
||||
u'http://feeds.pheedo.com/techtarget/Searchsmb/Smbs'),
|
||||
(u'Compliance news and advice for senior IT and business managers',
|
||||
u'http://feeds.pheedo.com/tt/1200'),
|
||||
(u'Server virtualization news and opinions',
|
||||
u'http://feeds.pheedo.com/SearchservervirtualizationServerVirtualizationNewsAndOpinions'),
|
||||
(u'The Virtualization Room', u'http://feeds.pheedo.com/techtarget/nzLe'),
|
||||
(u'Server virtualization technical tips and expert advice',
|
||||
u'http://feeds.pheedo.com/SearchservervirtualizationServerVirtualizationTechnicalTipsAndExpertAdvice'),
|
||||
(u'Cloud Computing news and Technical Advice',
|
||||
u'http://feeds.pheedo.com/1260'),
|
||||
(u'IT infrastructure news',
|
||||
u'http://feeds.pheedo.com/techtarget/Searchdatacenter/ItInfrastructure'),
|
||||
(u'Storage Channel Update',
|
||||
u'http://feeds.pheedo.com/ChannelMarker-TheItChannelWeblog'),
|
||||
(u'VMware Tips and News',
|
||||
u'http://feeds.pheedo.com/SearchvmwarecomVmwareTipsAndTricks'),
|
||||
(u'Enterprise IT news roundup',
|
||||
u'http://feeds.pheedo.com/WhatisEnterpriseItNewsRoundup'),
|
||||
(u'WhatIs: Enterprise IT tips and expert advice',
|
||||
u'http://feeds.pheedo.com/WhatisEnterpriseItTipsAndExpertAdvice'),
|
||||
(u'WhatIs: Enterprise IT news roundup',
|
||||
u'http://feeds.pheedo.com/WhatisEnterpriseItNewsRoundup'),
|
||||
]
|
@ -1,30 +0,0 @@
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2011, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
blogs.tedneward.com
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class InteroperabilityHappens(BasicNewsRecipe):
|
||||
title = 'Interoperability Happens'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'Tech blog by Ted Neward'
|
||||
oldest_article = 15
|
||||
max_articles_per_feed = 100
|
||||
language = 'en'
|
||||
encoding = 'utf-8'
|
||||
no_stylesheets = True
|
||||
use_embedded_content = True
|
||||
publication_type = 'blog'
|
||||
extra_css = """
|
||||
body{font-family: Verdana,Arial,Helvetica,sans-serif}
|
||||
"""
|
||||
|
||||
conversion_options = {
|
||||
'comment': description, 'tags': 'blog, technology, microsoft, programming, C#, Java', 'publisher': 'Ted Neward', 'language': language
|
||||
}
|
||||
|
||||
feeds = [(u'Posts', u'http://blogs.tedneward.com/SyndicationService.asmx/GetRss')]
|
@ -1,57 +0,0 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- mode: python -*-
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2012-2016, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
www.telam.com.ar
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class Telam(BasicNewsRecipe):
|
||||
title = 'Telam'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'AGENCIA DE NOTICIAS DE LA REPUBLICA ARGENTINA'
|
||||
publisher = 'Telam S.E.'
|
||||
category = 'news, politics, Argentina'
|
||||
oldest_article = 2
|
||||
max_articles_per_feed = 200
|
||||
no_stylesheets = True
|
||||
encoding = 'windows-1252'
|
||||
use_embedded_content = False
|
||||
language = 'es_AR'
|
||||
remove_empty_feeds = True
|
||||
auto_cleanup = True
|
||||
publication_type = 'newsportal'
|
||||
PREFIX = 'http://www.telam.com.ar'
|
||||
masthead_url = 'http://www.telam.com.ar/assets/img/logo.svg'
|
||||
extra_css = """
|
||||
body{font-family: Arial,Helvetica,sans-serif }
|
||||
img{margin-bottom: 0.4em; display:block}
|
||||
"""
|
||||
|
||||
conversion_options = {
|
||||
'comment': description, 'tags': category, 'publisher': publisher, 'language': language
|
||||
}
|
||||
|
||||
feeds = [
|
||||
(u'Ultimas noticias', u'http://www.telam.com.ar/rss2/ultimasnoticias.xml'),
|
||||
(u'Politica', u'http://www.telam.com.ar/rss2/politica.xml'),
|
||||
(u'Economia', u'http://www.telam.com.ar/rss2/economia.xml'),
|
||||
(u'Sociedad', u'http://www.telam.com.ar/rss2/sociedad.xml'),
|
||||
(u'Policiales', u'http://www.telam.com.ar/rss2/policiales.xml'),
|
||||
(u'Internacionales', u'http://www.telam.com.ar/rss2/internacional.xml'),
|
||||
(u'Espectaculos', u'http://www.telam.com.ar/rss2/espectaculos.xml'),
|
||||
(u'Cultura', u'http://www.telam.com.ar/rss2/cultura.xml'),
|
||||
(u'Deportes', u'http://www.telam.com.ar/rss2/deportes.xml'),
|
||||
(u'Educacion', u'http://www.telam.com.ar/rss2/educacion.xml')
|
||||
]
|
||||
|
||||
def get_article_url(self, article):
|
||||
url = BasicNewsRecipe.get_article_url(self, article)
|
||||
if url.startswith('/'):
|
||||
return self.PREFIX + url
|
||||
return url
|
@ -1,44 +0,0 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2009, Gerhard Aigner <gerhard.aigner at gmail.com>'
|
||||
|
||||
''' http://www.derstandard.at - Austrian Newspaper '''
|
||||
import re
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class TelepolisArtikel(BasicNewsRecipe):
|
||||
title = u'Telepolis (Artikel)'
|
||||
__author__ = 'Gerhard Aigner'
|
||||
publisher = 'Heise Zeitschriften Verlag GmbH & Co KG'
|
||||
category = 'news'
|
||||
description = 'Telepolis Artikel'
|
||||
language = 'de_AT'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
recursion = 0
|
||||
no_stylesheets = True
|
||||
|
||||
use_embedded_content = False
|
||||
remove_empty_feeds = True
|
||||
|
||||
remove_tags_before = dict(name='h1')
|
||||
remove_tags = [dict(name='img')]
|
||||
|
||||
feeds = [(u'Artikel', u'http://www.heise.de/tp/rss/news-a.rdf')]
|
||||
|
||||
preprocess_regexps = [(re.compile(r'<a[^>]*>', re.DOTALL | re.IGNORECASE), lambda match: ''),
|
||||
(re.compile(r'</a>', re.DOTALL | re.IGNORECASE), lambda match: ''), ]
|
||||
|
||||
html2lrf_options = [
|
||||
'--comment', description, '--category', category, '--publisher', publisher]
|
||||
|
||||
html2epub_options = 'publisher="' + publisher + \
|
||||
'"\ncomments="' + description + '"\ntags="' + category + '"'
|
||||
|
||||
def print_version(self, url):
|
||||
p = re.compile(r'\d{5}', re.DOTALL | re.IGNORECASE)
|
||||
m = p.search(url)
|
||||
return "http://www.heise.de/bin/tp/issue/r4/dl-artikel2.cgi?artikelnr=" + m.group() + "&mode=print"
|
@ -1,40 +0,0 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class AdvancedUserRecipe1299054026(BasicNewsRecipe):
|
||||
title = u'Thai Post Daily'
|
||||
__author__ = 'Chotechai P.'
|
||||
language = 'th'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
cover_url = 'http://upload.wikimedia.org/wikipedia/th/1/10/ThaiPost_Logo.png'
|
||||
feeds = [
|
||||
(u'\u0e02\u0e48\u0e32\u0e27\u0e2b\u0e19\u0e49\u0e32\u0e2b\u0e19\u0e36\u0e48\u0e07', u'http://thaipost.net/taxonomy/term/1/all/feed'),
|
||||
(u'\u0e1a\u0e17\u0e1a\u0e23\u0e23\u0e13\u0e32\u0e18\u0e34\u0e01\u0e32\u0e23', u'http://thaipost.net/taxonomy/term/11/all/feed'),
|
||||
(u'\u0e40\u0e1b\u0e25\u0e27 \u0e2a\u0e35\u0e40\u0e07\u0e34\u0e19', u'http://thaipost.net/taxonomy/term/2/all/feed'),
|
||||
(u'\u0e2a\u0e20\u0e32\u0e1b\u0e23\u0e30\u0e0a\u0e32\u0e0a\u0e19', u'http://thaipost.net/taxonomy/term/3/all/feed'),
|
||||
(u'\u0e16\u0e39\u0e01\u0e17\u0e38\u0e01\u0e02\u0e49\u0e2d', u'http://thaipost.net/taxonomy/term/4/all/feed'),
|
||||
(u'\u0e01\u0e32\u0e23\u0e40\u0e21\u0e37\u0e2d\u0e07', u'http://thaipost.net/taxonomy/term/5/all/feed'),
|
||||
(u'\u0e17\u0e48\u0e32\u0e19\u0e02\u0e38\u0e19\u0e19\u0e49\u0e2d\u0e22', u'http://thaipost.net/taxonomy/term/12/all/feed'),
|
||||
(u'\u0e1a\u0e17\u0e04\u0e27\u0e32\u0e21\u0e1e\u0e34\u0e40\u0e28\u0e29', u'http://thaipost.net/taxonomy/term/66/all/feed'),
|
||||
(u'\u0e23\u0e32\u0e22\u0e07\u0e32\u0e19\u0e1e\u0e34\u0e40\u0e28\u0e29', u'http://thaipost.net/taxonomy/term/67/all/feed'),
|
||||
(u'\u0e1a\u0e31\u0e19\u0e17\u0e36\u0e01\u0e2b\u0e19\u0e49\u0e32 4', u'http://thaipost.net/taxonomy/term/13/all/feed'),
|
||||
(u'\u0e40\u0e2a\u0e35\u0e22\u0e1a\u0e0b\u0e36\u0e48\u0e07\u0e2b\u0e19\u0e49\u0e32', u'http://thaipost.net/taxonomy/term/64/all/feed'),
|
||||
(u'\u0e04\u0e31\u0e19\u0e1b\u0e32\u0e01\u0e2d\u0e22\u0e32\u0e01\u0e40\u0e25\u0e48\u0e32', u'http://thaipost.net/taxonomy/term/65/all/feed'),
|
||||
(u'\u0e40\u0e28\u0e23\u0e29\u0e10\u0e01\u0e34\u0e08', u'http://thaipost.net/taxonomy/term/6/all/feed'),
|
||||
(u'\u0e01\u0e23\u0e30\u0e08\u0e01\u0e44\u0e23\u0e49\u0e40\u0e07\u0e32', u'http://thaipost.net/taxonomy/term/14/all/feed'),
|
||||
(u'\u0e01\u0e23\u0e30\u0e08\u0e01\u0e2b\u0e31\u0e01\u0e21\u0e38\u0e21', u'http://thaipost.net/taxonomy/term/71/all/feed'),
|
||||
(u'\u0e04\u0e34\u0e14\u0e40\u0e2b\u0e19\u0e37\u0e2d\u0e01\u0e23\u0e30\u0e41\u0e2a', u'http://thaipost.net/taxonomy/term/69/all/feed'),
|
||||
(u'\u0e23\u0e32\u0e22\u0e07\u0e32\u0e19', u'http://thaipost.net/taxonomy/term/68/all/feed'),
|
||||
(u'\u0e2d\u0e34\u0e42\u0e04\u0e42\u0e1f\u0e01\u0e31\u0e2a', u'http://thaipost.net/taxonomy/term/10/all/feed'),
|
||||
(u'\u0e01\u0e32\u0e23\u0e28\u0e36\u0e01\u0e29\u0e32-\u0e2a\u0e32\u0e18\u0e32\u0e23\u0e13\u0e2a\u0e38\u0e02', u'http://thaipost.net/taxonomy/term/7/all/feed'), # noqa
|
||||
(u'\u0e15\u0e48\u0e32\u0e07\u0e1b\u0e23\u0e30\u0e40\u0e17\u0e28', u'http://thaipost.net/taxonomy/term/8/all/feed'),
|
||||
(u'\u0e01\u0e35\u0e2c\u0e32', u'http://thaipost.net/taxonomy/term/9/all/feed')]
|
||||
|
||||
def print_version(self, url):
|
||||
return url.replace(url, 'http://www.thaipost.net/print/' + url[32:])
|
||||
|
||||
remove_tags = []
|
||||
remove_tags.append(dict(name='div', attrs={'class': 'print-logo'}))
|
||||
remove_tags.append(dict(name='div', attrs={'class': 'print-site_name'}))
|
||||
remove_tags.append(dict(name='div', attrs={'class': 'print-breadcrumb'}))
|
@ -1,46 +0,0 @@
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2011, Pat Stapleton <pat.stapleton at gmail.com>'
|
||||
'''
|
||||
abc.net.au/news
|
||||
'''
|
||||
import re
|
||||
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
|
||||
|
||||
class TheDailyNewsEG(BasicNewsRecipe):
|
||||
title = u'The Daily News Egypt'
|
||||
__author__ = 'Omm Mishmishah'
|
||||
description = 'News from Egypt'
|
||||
masthead_url = 'http://www.thedailynewsegypt.com/images/DailyNews-03_05.gif'
|
||||
cover_url = 'http://www.thedailynewsegypt.com/images/DailyNews-03_05.gif'
|
||||
|
||||
auto_cleanup = True
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = False
|
||||
use_embedded_content = False
|
||||
encoding = 'utf8'
|
||||
publisher = 'The Daily News Egypt'
|
||||
category = 'News, Egypt, World'
|
||||
language = 'en_EG'
|
||||
publication_type = 'newsportal'
|
||||
# preprocess_regexps = [(re.compile(r'<!--.*?-->', re.DOTALL), lambda m: '')]
|
||||
# Remove annoying map links (inline-caption class is also used for some
|
||||
# image captions! hence regex to match maps.google)
|
||||
preprocess_regexps = [(re.compile(
|
||||
r'<a class="inline-caption" href="http://maps\.google\.com.*?/a>', re.DOTALL), lambda m: '')]
|
||||
conversion_options = {
|
||||
'comments': description, 'tags': category, 'language': language, 'publisher': publisher, 'linearize_tables': False
|
||||
}
|
||||
|
||||
keep_only_tags = [dict(attrs={'class': ['article section']})]
|
||||
|
||||
remove_tags = [dict(attrs={'class': ['related', 'tags', 'tools', 'attached-content ready',
|
||||
'inline-content story left', 'inline-content map left contracted', 'published',
|
||||
'story-map', 'statepromo', 'topics', ]})]
|
||||
|
||||
remove_attributes = ['width', 'height']
|
||||
|
||||
feeds = [(u'The Daily News Egypt',
|
||||
u'http://www.thedailynewsegypt.com/rss.php?sectionid=all')]
|
@ -1,63 +0,0 @@
|
||||
#!/usr/bin/env python
|
||||
__license__ = 'GPL v3'
|
||||
__author__ = 'Lorenzo Vigentini and Tom Surace'
|
||||
__copyright__ = '2009, Lorenzo Vigentini <l.vigentini at gmail.com>, 2013 Tom Surace <tekhedd@byteheaven.net>'
|
||||
description = 'The Escapist Magazine - v1.3 (2013, April 2013)'
|
||||
|
||||
#
|
||||
# Based on 'the Escapist Magazine - v1.02 (09, January 2010)'
|
||||
|
||||
'''
|
||||
http://www.escapistmagazine.com/
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class al(BasicNewsRecipe):
|
||||
author = 'Lorenzo Vigentini and Tom Surace'
|
||||
description = 'The Escapist Magazine'
|
||||
cover_url = 'http://cdn.themis-media.com/themes/escapistmagazine/default/images/logo.png'
|
||||
title = u'The Escapist Magazine'
|
||||
publisher = 'Themis Media'
|
||||
category = 'Video games news, lifestyle, gaming culture'
|
||||
|
||||
language = 'en'
|
||||
timefmt = '[%a, %d %b, %Y]'
|
||||
|
||||
oldest_article = 1
|
||||
max_articles_per_feed = 100
|
||||
use_embedded_content = False
|
||||
recursion = 10
|
||||
|
||||
remove_javascript = True
|
||||
no_stylesheets = True
|
||||
|
||||
feeds = [
|
||||
(u'Daily News', u'http://www.escapistmagazine.com/rss/news/0.xml'),
|
||||
(u'Articles', u'http://www.escapistmagazine.com/rss/articles/0.xml')
|
||||
]
|
||||
|
||||
def print_version(self, url):
|
||||
# Expect article url in the format:
|
||||
# http://www.escapistmagazine.com/news/view/123198-article-name?utm_source=rss&utm_medium=rss&utm_campaign=news
|
||||
#
|
||||
baseURL = 'http://www.escapistmagazine.com'
|
||||
segments = url.split('/')
|
||||
subPath = '/' + segments[3] + '/'
|
||||
|
||||
# The article number is the "number" that starts the name
|
||||
articleNumber = segments[len(segments) - 1] # the "article name"
|
||||
articleNumber = articleNumber.split('-')[0] # keep part before hyphen
|
||||
|
||||
fullUrl = baseURL + subPath + 'print/' + articleNumber
|
||||
return fullUrl
|
||||
|
||||
keep_only_tags = [
|
||||
dict(name='div', attrs={'id': 'article'})
|
||||
]
|
||||
|
||||
remove_tags = [
|
||||
dict(name='div', attrs={
|
||||
'id': ['ad_leaderboard', 'print_notice', 'bottom_panel_container']})
|
||||
]
|
@ -1,12 +0,0 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class AdvancedUserRecipe1365777047(BasicNewsRecipe):
|
||||
title = u'The Feature'
|
||||
__author__ = 'Jose Pinto'
|
||||
language = 'en'
|
||||
oldest_article = 30
|
||||
max_articles_per_feed = 100
|
||||
auto_cleanup = True
|
||||
use_embedded_content = False
|
||||
feeds = [(u'Latest', u'http://thefeature.net/rss/links')]
|
@ -1,74 +0,0 @@
|
||||
|
||||
'''
|
||||
www.philstar.com
|
||||
'''
|
||||
|
||||
import time
|
||||
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
|
||||
|
||||
class Freeman(BasicNewsRecipe):
|
||||
title = 'The Freeman'
|
||||
custom_title = "The Freeman - " + time.strftime('%d %b %Y %I:%M %p')
|
||||
__author__ = 'jde'
|
||||
__date__ = '31 May 2012'
|
||||
__version__ = '1.0'
|
||||
description = 'The Freeman is a daily English-language newspaper published in Cebu, Philippines, by the Philippine Star. It was the first newspaper in Cebu, first published in May 1919. The motto of the newspaper is "The fair and fearless" - philstar.com is a Philippine news and entertainment portal for the Filipino global community. It is the online presence of the STAR Group of Publications, a leading publisher of newspapers and magazines in the Philippines.' # noqa
|
||||
language = 'en_PH'
|
||||
publisher = 'The Philippine STAR'
|
||||
category = 'news, Philippines'
|
||||
tags = 'news, Philippines'
|
||||
cover_url = 'http://www.philstar.com/images/logo_Freeman.jpg'
|
||||
masthead_url = 'http://www.philstar.com/images/logo_Freeman.jpg'
|
||||
oldest_article = 1.5 # days
|
||||
max_articles_per_feed = 25
|
||||
simultaneous_downloads = 10
|
||||
publication_type = 'newspaper'
|
||||
timefmt = ' [%a, %d %b %Y %I:%M %p]'
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
encoding = None
|
||||
recursions = 0
|
||||
needs_subscription = False
|
||||
remove_javascript = True
|
||||
remove_empty_feeds = True
|
||||
auto_cleanup = False
|
||||
|
||||
remove_tags = [dict(name='img', attrs={'id': 'Image1'}) # Logo
|
||||
# Section (Headlines, Nation, Metro, ...)
|
||||
# Comments
|
||||
# View Comments
|
||||
# Zoom
|
||||
, dict(name='span', attrs={'id': 'ControlArticle1_LabelHeader'}), dict(name='a', attrs={'id': 'ControlArticle1_FormView1_hlComments'}), dict(name='img', attrs={'src': 'images/post-comments.jpg'}), dict(name='a', attrs={'id': 'ControlArticle1_FormView1_ControlPhotoAndCaption1_hlImageCaption'}) # noqa
|
||||
]
|
||||
conversion_options = {'title': custom_title,
|
||||
'comments': description,
|
||||
'tags': tags,
|
||||
'language': language,
|
||||
'publisher': publisher,
|
||||
'authors': publisher,
|
||||
'smarten_punctuation': True
|
||||
}
|
||||
|
||||
feeds = [
|
||||
|
||||
('Cebu News' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=107'),
|
||||
('Freeman Opinion' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=109'),
|
||||
('Metro Cebu' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=531'),
|
||||
('Region' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=530'),
|
||||
('Cebu Business' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=108'),
|
||||
('Cebu Sports' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=110'),
|
||||
('Cebu Lifestyle' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=111'),
|
||||
('Cebu Entertainment' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=51')
|
||||
]
|
||||
|
||||
# process the printer friendly version of article
|
||||
def print_version(self, url):
|
||||
return url.replace('/Article', '/ArticlePrinterFriendly')
|
||||
|
||||
# obtain title from printer friendly version of article; avoiding
|
||||
# add_toc_thumbnail changing title when article has image
|
||||
def populate_article_metadata(self, article, soup, first):
|
||||
article.title = soup.find(
|
||||
'span', {'id': 'ControlArticle1_FormView1_ArticleHeaderLabel'}).contents[0].strip()
|
@ -1,80 +0,0 @@
|
||||
import time
|
||||
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
|
||||
|
||||
class TheManilaBulletin(BasicNewsRecipe):
|
||||
title = u'The Manila Bulletin'
|
||||
custom_title = "The Manila Bulletin - " + \
|
||||
time.strftime('%d %b %Y %I:%M %p')
|
||||
__author__ = 'jde'
|
||||
__date__ = '06 June 2012'
|
||||
__version__ = '1.0'
|
||||
description = "The Manila Bulletin, (also known as the Bulletin and previously known as the Manila Daily Bulletin and the Bulletin Today) is the Philippines' largest broadsheet newspaper by circulation." # noqa
|
||||
language = 'en_PH'
|
||||
publisher = 'The Manila Bulletin'
|
||||
category = 'news, Philippines'
|
||||
tags = 'news, Philippines'
|
||||
cover_url = 'http://www.mb.com.ph/sites/default/files/mb_logo.jpg'
|
||||
masthead_url = 'http://www.mb.com.ph/sites/default/files/mb_logo.jpg'
|
||||
oldest_article = 1.5 # days
|
||||
max_articles_per_feed = 25
|
||||
simultaneous_downloads = 20
|
||||
publication_type = 'newspaper'
|
||||
timefmt = ' [%a, %d %b %Y %I:%M %p]'
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
encoding = None
|
||||
recursions = 0
|
||||
needs_subscription = False
|
||||
remove_javascript = True
|
||||
remove_empty_feeds = True
|
||||
|
||||
keep_only_tags = [
|
||||
dict(name='div', attrs={'class': 'article node'}), dict(name='div', attrs={
|
||||
'class': 'label'}), dict(name='div', attrs={'class': 'content clear-block'})
|
||||
]
|
||||
|
||||
remove_tags = [
|
||||
dict(name='li', attrs={'class': 'print_html'}),
|
||||
dict(name='li', attrs={'class': 'print_html first'}),
|
||||
dict(name='li', attrs={'class': 'print_mail'}),
|
||||
dict(name='li', attrs={'class': 'print_mail last'}),
|
||||
dict(name='div', attrs={'class': 'article-sidebar'}), dict(name='table', attrs={'id': 'attachments'})
|
||||
]
|
||||
|
||||
auto_cleanup = False
|
||||
|
||||
conversion_options = {'title': custom_title,
|
||||
'comments': description,
|
||||
'tags': tags,
|
||||
'language': language,
|
||||
'publisher': publisher,
|
||||
'authors': publisher,
|
||||
'smarten_punctuation': True
|
||||
}
|
||||
|
||||
feeds = [
|
||||
# ,
|
||||
(u'Regional', u'http://www.mb.com.ph/feed/news/regional'),
|
||||
|
||||
(u'Main News', u'http://www.mb.com.ph/feed/news/main'),
|
||||
(u'Business', u'http://www.mb.com.ph/feed/business'),
|
||||
(u'Sports', u'http://www.mb.com.ph/feed/sports'),
|
||||
(u'Entertainment', u'http://www.mb.com.ph/feed/entertainment'),
|
||||
(u'Opinion', u'http://www.mb.com.ph/feed/news/opinion'),
|
||||
(u'Agriculture', u'http://www.mb.com.ph/feed/news/agriculture'),
|
||||
(u'Technology', u'http://www.mb.com.ph/feed/lifestyle/technology'),
|
||||
(u'Lifestyle', u'http://www.mb.com.ph/feed/lifestyle'),
|
||||
(u'Drive', u'http://www.mb.com.ph/feed/lifestyle/drive'),
|
||||
]
|
||||
|
||||
|
||||
# if use print version - convert url
|
||||
# http://www.mb.com.ph/articles/361252/higher-power-rate-looms
|
||||
# http://www.mb.com.ph/print/361252
|
||||
#
|
||||
# def print_version(self,url):
|
||||
# segments = url.split('/')
|
||||
# printURL = '/'.join(segments[0:3]) + '/print/' + '/'.join(segments[5])
|
||||
# return printURL
|
@ -1,68 +0,0 @@
|
||||
import time
|
||||
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
|
||||
|
||||
class TheManilaTimes(BasicNewsRecipe):
|
||||
title = u'The Manila Times'
|
||||
custom_title = "The Manila Times - " + time.strftime('%d %b %Y %I:%M %p')
|
||||
__author__ = 'jde'
|
||||
__date__ = '06 June 2012'
|
||||
__version__ = '1.0'
|
||||
description = 'The Manila Times is the oldest existing English language newspaper in the Philippines.'
|
||||
language = 'en_PH'
|
||||
publisher = 'The Manila Times'
|
||||
category = 'news, Philippines'
|
||||
tags = 'news, Philippines'
|
||||
cover_url = 'http://www.manilatimes.net/images/banners/logo-mt.png'
|
||||
masthead_url = 'http://www.manilatimes.net/images/banners/logo-mt.png'
|
||||
oldest_article = 1.5 # days
|
||||
max_articles_per_feed = 25
|
||||
simultaneous_downloads = 20
|
||||
publication_type = 'newspaper'
|
||||
timefmt = ' [%a, %d %b %Y %I:%M %p]'
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
encoding = None
|
||||
recursions = 0
|
||||
needs_subscription = False
|
||||
remove_javascript = True
|
||||
remove_empty_feeds = True
|
||||
|
||||
remove_tags = [
|
||||
dict(name='img', attrs={'alt': 'Print'}), dict(name='img', attrs={
|
||||
'alt': 'Email:'}), dict(name='dd', attrs={'class': 'hits'})
|
||||
]
|
||||
|
||||
auto_cleanup = True
|
||||
|
||||
conversion_options = {'title': custom_title,
|
||||
'comments': description,
|
||||
'tags': tags,
|
||||
'language': language,
|
||||
'publisher': publisher,
|
||||
'authors': publisher,
|
||||
'smarten_punctuation': True
|
||||
}
|
||||
|
||||
feeds = [
|
||||
(u'Breaking News', u'http://www.manilatimes.net/index.php/news/breaking-news?format=feed&type=rss'),
|
||||
(u'Top Stories', u'http://www.manilatimes.net/index.php/news/top-stories?format=feed&type=rss'),
|
||||
(u'Headlines', u'http://www.manilatimes.net/index.php/news/headlines-mt?format=feed&type=rss'),
|
||||
(u'Nation', u'http://www.manilatimes.net/index.php/news/nation?format=feed&type=rss'),
|
||||
(u'Regions', u'http://www.manilatimes.net/index.php/news/regions?format=feed&type=rss'),
|
||||
(u'World', u'http://www.manilatimes.net/index.php/news/world?format=feed&type=rss'),
|
||||
(u'Top Business News', u'http://www.manilatimes.net/index.php/business/top-business-news?format=feed&type=rss'),
|
||||
(u'Business Columnist', u'http://www.manilatimes.net/index.php/business/business-columnist?format=feed&type=rss'),
|
||||
(u'Opinion - Editorials', u'http://www.manilatimes.net/index.php/opinion/editorials?format=feed&type=rss'),
|
||||
(u'Opinion - Columnist', u'http://www.manilatimes.net/index.php/opinion/columnist1?format=feed&type=rss'),
|
||||
(u'Opinion - Editorial Cartoon', u'http://www.manilatimes.net/index.php/opinion/editorial-cartoon?format=feed&type=rss'),
|
||||
(u'Top Sports News', u'http://www.manilatimes.net/index.php/sports/top-sports-news?format=feed&type=rss'),
|
||||
(u'Sports Columnist', u'http://www.manilatimes.net/index.php/sports/sports-columnist?format=feed&type=rss'),
|
||||
(u'Life & Times', u'http://www.manilatimes.net/index.php/life-and-times?format=feed&type=rss'),
|
||||
(u'Showtime', u'http://www.manilatimes.net/index.php/life-and-times/showtime?format=feed&type=rss'),
|
||||
(u'Sunday Times', u'http://www.manilatimes.net/index.php/sunday-times?format=feed&type=rss'),
|
||||
(u'Sunday Times Magazine', u'http://www.manilatimes.net/index.php/sunday-times/the-sunday-times-magazines?format=feed&type=rss'),
|
||||
(u'Motoring News', u'http://www.manilatimes.net/index.php/fast-times/motoring-news?format=feed&type=rss'),
|
||||
(u'Motoring Columnist', u'http://www.manilatimes.net/index.php/fast-times/motoring-columnist?format=feed&type=rss'),
|
||||
(u'Technology', u'http://www.manilatimes.net/index.php/technology?format=feed&type=rss')]
|
@ -1,50 +0,0 @@
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2012, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
www.thenewage.co.za
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class TheNewAge_za(BasicNewsRecipe):
|
||||
title = 'The New Age'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = "The New Age newspaper is a national daily newspaper, owned and operated by TNA Media (Pty) Ltd. TNA Media was established in June 2010 and the first publication of The New Age was on 6 December 2010. The New Age covers news from all nine provinces, along with national events, Op-Ed columns, politics, Africa and International news, sports, business, entertainment, lifestyle, science and technology." # noqa
|
||||
publisher = 'TNA Media (Pty.) Ltd.'
|
||||
category = 'news, politics, South Africa'
|
||||
oldest_article = 2
|
||||
max_articles_per_feed = 200
|
||||
no_stylesheets = True
|
||||
encoding = 'utf8'
|
||||
use_embedded_content = False
|
||||
auto_cleanup = False
|
||||
language = 'en_ZA'
|
||||
remove_empty_feeds = True
|
||||
publication_type = 'newspaper'
|
||||
masthead_url = 'http://www.thenewage.co.za/image/tnalogo.png'
|
||||
extra_css = """
|
||||
body{font-family: Arial,Verdana,sans-serif }
|
||||
img{display: block}
|
||||
.storyheadline{font-size: x-large; font-weight: bold}
|
||||
"""
|
||||
|
||||
conversion_options = {
|
||||
'comment': description, 'tags': category, 'publisher': publisher, 'language': language
|
||||
}
|
||||
|
||||
remove_tags = [
|
||||
dict(name=['object', 'embed', 'iframe', 'table', 'meta', 'link'])]
|
||||
keep_only_tags = [
|
||||
dict(name='div', attrs={'id': ['dv_headline', 'dv_story_dtls']})]
|
||||
|
||||
feeds = [
|
||||
|
||||
(u'National', u'http://www.thenewage.co.za/rss.aspx?cat_id=1007'),
|
||||
(u'Provinces', u'http://www.thenewage.co.za/rss.aspx?cat_id=1008'),
|
||||
(u'Business', u'http://www.thenewage.co.za/rss.aspx?cat_id=9'),
|
||||
(u'Sport', u'http://www.thenewage.co.za/rss.aspx?cat_id=10'),
|
||||
(u'World', u'http://www.thenewage.co.za/rss.aspx?cat_id=1020'),
|
||||
(u'Africa', u'http://www.thenewage.co.za/rss.aspx?cat_id=1019'),
|
||||
(u'Science&Tech', u'http://www.thenewage.co.za/rss.aspx?cat_id=1021')
|
||||
]
|
@ -1,102 +0,0 @@
|
||||
|
||||
'''
|
||||
www.philstar.com
|
||||
'''
|
||||
|
||||
import time
|
||||
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
|
||||
|
||||
class PhilippineStar(BasicNewsRecipe):
|
||||
title = 'The Philippine Star'
|
||||
custom_title = "The Philippine Star - " + \
|
||||
time.strftime('%d %b %Y %I:%M %p')
|
||||
__author__ = 'jde'
|
||||
__date__ = '31 May 2012'
|
||||
__version__ = '1.0'
|
||||
description = 'The Philippine Star is a daily English-language broadsheet newspaper based in Manila. It has the most subscribers of any newspaper in the Philippines - philstar.com is a Philippine news and entertainment portal for the Filipino global community. It is the online presence of the STAR Group of Publications, a leading publisher of newspapers and magazines in the Philippines.' # noqa
|
||||
language = 'en_PH'
|
||||
publisher = 'The Philippine STAR'
|
||||
category = 'news, Philippines'
|
||||
tags = 'news, Philippines'
|
||||
cover_url = 'http://www.philstar.com/images/philstar-logo-white.jpg'
|
||||
masthead_url = 'http://www.philstar.com/images/philstar-logo-white.jpg'
|
||||
oldest_article = 1 # days
|
||||
max_articles_per_feed = 25
|
||||
simultaneous_downloads = 20
|
||||
publication_type = 'newspaper'
|
||||
timefmt = ' [%a, %d %b %Y %I:%M %p]'
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
encoding = None
|
||||
recursions = 0
|
||||
needs_subscription = False
|
||||
remove_javascript = True
|
||||
remove_empty_feeds = True
|
||||
auto_cleanup = False
|
||||
|
||||
remove_tags = [dict(name='img', attrs={'id': 'Image1'}) # Logo
|
||||
# Section (Headlines, Nation, Metro, ...)
|
||||
# Comments
|
||||
# View Comments
|
||||
# Zoom
|
||||
, dict(name='span', attrs={'id': 'ControlArticle1_LabelHeader'}), dict(name='a', attrs={'id': 'ControlArticle1_FormView1_hlComments'}), dict(name='img', attrs={'src': 'images/post-comments.jpg'}), dict(name='a', attrs={'id': 'ControlArticle1_FormView1_ControlPhotoAndCaption1_hlImageCaption'}) # noqa
|
||||
]
|
||||
conversion_options = {'title': custom_title,
|
||||
'comments': description,
|
||||
'tags': tags,
|
||||
'language': language,
|
||||
'publisher': publisher,
|
||||
'authors': publisher,
|
||||
'smarten_punctuation': True
|
||||
}
|
||||
|
||||
feeds = [
|
||||
|
||||
('Headlines' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=63'),
|
||||
('Breaking News' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=200'),
|
||||
('News Feature' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=68'),
|
||||
('Nation' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=67'),
|
||||
('Metro' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=65'),
|
||||
('Business' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=66'),
|
||||
('Sports' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=69'),
|
||||
('Entertainment' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=70'),
|
||||
('Science & Technology' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=75'),
|
||||
('Networks' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=71'),
|
||||
('Business as Usual' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=78'),
|
||||
('Banking' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=74'),
|
||||
('Motoring' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=72'),
|
||||
('Real Estate' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=76'),
|
||||
('Telecoms' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=73'),
|
||||
('Agriculture' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=77'),
|
||||
('Arts & Culture' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=79'),
|
||||
('Food & Leisure' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=81'),
|
||||
('Health & Family' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=80'),
|
||||
('Education & Home' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=442'),
|
||||
('Travel & Tourism' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=87'),
|
||||
('Newsmakers' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=88'),
|
||||
('Business Life' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=82'),
|
||||
('Fashion & Beauty' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=83'),
|
||||
('For Men' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=446'),
|
||||
('Gadgets' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=449'),
|
||||
('Sunday Life' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=86'),
|
||||
('Supreme' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=448'),
|
||||
('Opinion' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=64'),
|
||||
('Letters to the Editor' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=135'),
|
||||
('Starweek Magazine' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=90'),
|
||||
('Modern Living' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=85'),
|
||||
('YStyle' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=451'),
|
||||
('Allure' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=89'),
|
||||
('Weather' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=116')
|
||||
]
|
||||
|
||||
# process the printer friendly version of article
|
||||
def print_version(self, url):
|
||||
return url.replace('/Article', '/ArticlePrinterFriendly')
|
||||
|
||||
# obtain title from printer friendly version of article; avoiding
|
||||
# add_toc_thumbnail changing title when article has image
|
||||
def populate_article_metadata(self, article, soup, first):
|
||||
article.title = soup.find(
|
||||
'span', {'id': 'ControlArticle1_FormView1_ArticleHeaderLabel'}).contents[0].strip()
|
@ -1,46 +0,0 @@
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2008 - 2011, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
www.scotsman.com/the-scotsman
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class TheScotsman(BasicNewsRecipe):
|
||||
title = 'The Scotsman'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'News from Scotland'
|
||||
publisher = 'Johnston Publishing Ltd.'
|
||||
category = 'news, politics, Scotland, UK'
|
||||
oldest_article = 2
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
language = 'en_GB'
|
||||
encoding = 'utf-8'
|
||||
publication_type = 'newspaper'
|
||||
remove_empty_feeds = True
|
||||
masthead_url = 'http://www.scotsman.com/webimage/swts_thescotsman_image_e_7_25526!image/3142543874.png_gen/derivatives/default/3142543874.png'
|
||||
extra_css = 'body{font-family: Arial,Helvetica,sans-serif}'
|
||||
|
||||
keep_only_tags = [dict(name='div', attrs={'class': 'article'})]
|
||||
remove_attributes = ['lang']
|
||||
|
||||
conversion_options = {
|
||||
'comment': description, 'tags': category, 'publisher': publisher, 'language': language
|
||||
}
|
||||
|
||||
feeds = [
|
||||
('Latest News', 'http://www.scotsman.com/cmlink/1.957140'),
|
||||
('UK', 'http://www.scotsman.com/cmlink/1.957142'),
|
||||
('Scotland', 'http://www.scotsman.com/cmlink/1.957141'),
|
||||
('International', 'http://www.scotsman.com/cmlink/1.957143'),
|
||||
('Politics', 'http://www.scotsman.com/cmlink/1.957044'),
|
||||
('Arts', 'http://www.scotsman.com/cmlink/1.1804825'),
|
||||
('Entertainment', 'http://www.scotsman.com/cmlink/1.957053'),
|
||||
('Sports', 'http://www.scotsman.com/cmlink/1.957151'),
|
||||
('Business', 'http://www.scotsman.com/cmlink/1.957156'),
|
||||
('Features', 'http://www.scotsman.com/cmlink/1.957149'),
|
||||
('Opinion', 'http://www.scotsman.com/cmlink/1.957054')
|
||||
]
|
@ -1,57 +0,0 @@
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
www.getwokingham.co.uk
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
|
||||
|
||||
class TheWokinghamTimes(BasicNewsRecipe):
|
||||
title = 'The Wokingham Times'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'News from UK'
|
||||
oldest_article = 2
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
encoding = 'utf8'
|
||||
publisher = 'The Wokingham Times - S&B media'
|
||||
category = 'news, UK, world'
|
||||
language = 'en_GB'
|
||||
publication_type = 'newsportal'
|
||||
extra_css = """
|
||||
body{ font-family: Arial,sans-serif }
|
||||
img{display: block; margin-bottom: 0.4em}
|
||||
"""
|
||||
|
||||
conversion_options = {
|
||||
'comments': description, 'tags': category, 'language': language, 'publisher': publisher
|
||||
}
|
||||
|
||||
keep_only_tags = [dict(name='div', attrs={'id': 'article-body'})]
|
||||
remove_tags = [
|
||||
dict(name='div', attrs={'class': ['ad']}), dict(name=[
|
||||
'meta', 'base', 'iframe', 'embed', 'object']), dict(name='span', attrs={'class': 'caption small'})
|
||||
]
|
||||
remove_attributes = ['width', 'height', 'lang']
|
||||
|
||||
feeds = [
|
||||
|
||||
('Home', 'http://www.getwokingham.co.uk/rss.xml'),
|
||||
('News', 'http://www.getwokingham.co.uk/news/rss.xml'),
|
||||
('Entertainment', 'http://www.getwokingham.co.uk/entertainment/rss.xml'),
|
||||
('Lifestyle', 'http://www.getwokingham.co.uk/lifestyle/rss.xml')
|
||||
]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
for item in soup.findAll('a'):
|
||||
if item.string is not None:
|
||||
str = item.string
|
||||
item.replaceWith(str)
|
||||
else:
|
||||
item.name = 'span'
|
||||
del item['href']
|
||||
return soup
|
@ -1,183 +0,0 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
from datetime import date
|
||||
|
||||
from lxml import etree
|
||||
|
||||
__copyright__ = '2015, April King <april@twoevils.org>'
|
||||
__license__ = 'GPL v3'
|
||||
__version__ = '1.2'
|
||||
|
||||
'''
|
||||
http://www.thecodelesscode.com/
|
||||
'''
|
||||
|
||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup as bs
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class CodelessCode(BasicNewsRecipe):
|
||||
__author__ = 'April King'
|
||||
title = u'The Codeless Code'
|
||||
category = 'fiction, programming, technology'
|
||||
chapters = {} # ie, Mousetrap -> 182
|
||||
compress_news_images = True
|
||||
compress_news_images_max_size = 100
|
||||
cover_url = 'http://www.thecodelesscode.com/pages/case-9/Lotus-050.jpg'
|
||||
credits = [u'<h2 class="chapter_title">{0}</h2>'.format(title),
|
||||
u'<p>By <em>Qi</em></p>',
|
||||
u'<p>An illustrated collection of (sometimes violent) fables concerning the Art and Philosophy of software development, written in the spirit of Zen kōans</p>', # noqa
|
||||
u'<p>eBook conversion courtesy of <em>{0}</em></p>'.format(__author__)]
|
||||
description = u'The Art and Philosophy of software development, written in the spirit of Zen kōans'
|
||||
extra_css = '.article_date { display: none; float: right; } \
|
||||
.chapter_title { font-size: 1.75em; margin-top: 0; } \
|
||||
.chapter_title::first-letter { font-size: 1.35em; font-weight: 500; letter-spacing: -.05em; } \
|
||||
h2 { margin-top: 0; } \
|
||||
.image_wrapper { text-align: center; }'
|
||||
index = 'http://www.thecodelesscode.com/contents'
|
||||
language = 'en'
|
||||
max_articles_per_feed = 1000 # I can only wish
|
||||
path_remappings = {} # IE, /case/182 -> articles_72/index.html
|
||||
publication_type = 'blog'
|
||||
publisher = 'Qi'
|
||||
resolve_internal_links = True
|
||||
scale_news_images = (600, 400)
|
||||
simultaneous_downloads = 1
|
||||
url = 'http://www.thecodelesscode.com'
|
||||
|
||||
def parse_index(self):
|
||||
koans = []
|
||||
|
||||
# Retrieve the contents page, containing the ToC
|
||||
soup = self.index_to_soup(self.index)
|
||||
|
||||
for koan in soup.findAll('tr'):
|
||||
# BS has some trouble with the weird layout
|
||||
tag = koan.find('a')
|
||||
|
||||
if tag is None:
|
||||
continue
|
||||
if 'random' in tag['href']:
|
||||
continue
|
||||
|
||||
# Minor coding error causes calibre to glitch; use the current date
|
||||
# for the most recent title
|
||||
koan_date = koan.find('td', attrs={'class': 'toc-date'})
|
||||
if koan_date is None:
|
||||
koan_date = date.isoformat(date.today())
|
||||
else:
|
||||
koan_date = koan_date.string
|
||||
|
||||
title = tag.string
|
||||
url = self.url + tag['href']
|
||||
|
||||
if u'The Applicant' in title:
|
||||
continue # Only the main story
|
||||
|
||||
koans.append({
|
||||
'content': '',
|
||||
'date': koan_date,
|
||||
'description': '',
|
||||
'title': title,
|
||||
'url': url,
|
||||
})
|
||||
|
||||
# ie, Mousetrap -> 182
|
||||
self.chapters[title] = url.split('/')[-1]
|
||||
|
||||
# Oldest koans first
|
||||
koans.reverse()
|
||||
|
||||
# Log and then get out of here
|
||||
self.log("Found {0} koans".format(len(koans)))
|
||||
return([(self.title, koans)])
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
title = soup.find('h1', attrs={'class': 'title'}).find(
|
||||
'a', attrs={'class': 'subtle'}).string
|
||||
|
||||
# Add a title at the beginning of each chapter
|
||||
if title in self.chapters:
|
||||
title = '<div class="chapter_title">{0}</div>'.format(title)
|
||||
|
||||
# Load up the actual story
|
||||
koan = soup.find('div', attrs={'class': 'story koan'})
|
||||
|
||||
# Kind of a hack-y way to get .children in BS3 <a><b><c></c></b></a>
|
||||
# -> <b><c></c></b>
|
||||
contents = list(koan.contents)
|
||||
koan = bs(title)
|
||||
|
||||
for i in reversed(contents):
|
||||
koan.insert(1, i)
|
||||
|
||||
# Remove all anchors that don't contain /case/, leaving them as just their text
|
||||
# Note that we'll come back and clean up /case/ links when the URLs are remapped
|
||||
# during postprocess_book()
|
||||
anchors = koan.findAll('a')
|
||||
if anchors != []:
|
||||
for anchor in anchors:
|
||||
if '/case/' in anchor['href']:
|
||||
pass
|
||||
elif 'note' in anchor['href']:
|
||||
anchor.replaceWith('')
|
||||
else:
|
||||
# Again, a hacky way to get the contents of the tag, thanks
|
||||
# to BS3
|
||||
contents = list(anchor.contents)
|
||||
linktext = bs()
|
||||
for i in reversed(contents):
|
||||
linktext.insert(1, i)
|
||||
anchor.replaceWith(linktext)
|
||||
|
||||
# Find all the images, and wrap them up in an image_wrapper div
|
||||
for i in range(0, len(koan.contents), 1):
|
||||
if not hasattr(koan.contents[i], 'name'):
|
||||
continue # skip carriage returns
|
||||
if koan.contents[i].name == u'img':
|
||||
div = bs('<div class="image_wrapper"></div>')
|
||||
div.div.insert(0, koan.contents[i])
|
||||
koan.insert(i, div)
|
||||
|
||||
return(koan)
|
||||
|
||||
def canonicalize_internal_url(self, url, is_link=True):
|
||||
url = url.split(self.url)[-1]
|
||||
return BasicNewsRecipe.canonicalize_internal_url(self, url, is_link=is_link)
|
||||
|
||||
def postprocess_book(self, oeb, opts, log):
|
||||
# Go through each internal representation of each HTML file, and fix
|
||||
# all the broken hrefs, if possible
|
||||
for item in oeb.manifest.items:
|
||||
if item.media_type == 'text/html':
|
||||
|
||||
for node in item.data.xpath('//*[@href]'):
|
||||
naughty_href = node.get('href')
|
||||
|
||||
if naughty_href in self.path_remappings:
|
||||
node.set('href', '../' +
|
||||
self.path_remappings[naughty_href])
|
||||
href = node.get('href')
|
||||
self.log(
|
||||
"Remapped href {0} --> {1}".format(naughty_href, href))
|
||||
|
||||
# Remove the superfluous extra feed page at the beginning of the book, replacing it
|
||||
# with the proper credits
|
||||
for item in oeb.manifest.hrefs['index.html'].data.xpath('//*[local-name()="ul"]'):
|
||||
item.getparent().remove(item)
|
||||
|
||||
for item in oeb.manifest.hrefs['index.html'].data.xpath('//*[local-name()="p"]'):
|
||||
item.getparent().remove(item)
|
||||
|
||||
for item in oeb.manifest.hrefs['index.html'].data.xpath('//*[local-name()="div"]'):
|
||||
for credit in self.credits[::-1]:
|
||||
item.insert(0, etree.fromstring(credit, parser=etree.XMLParser(recover=True, no_network=True, resolve_entities=False)))
|
||||
|
||||
# Change the creator from "calibre" to the actual author
|
||||
# Also, we don't need the date in the ebook's title
|
||||
oeb.metadata.items['creator'][0].value = self.publisher
|
||||
oeb.metadata.items['description'][0].value = oeb.metadata.items[
|
||||
'description'][0].value.split('\n\nArticles in this issue')[0]
|
||||
oeb.metadata.items['publication_type'][0].value = self.title
|
||||
oeb.metadata.items['publisher'][0].value = self.publisher
|
||||
oeb.metadata.items['title'][0].value = self.title
|
@ -1,52 +0,0 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
www.livemint.com
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class Edgesingapore(BasicNewsRecipe):
|
||||
title = 'The Edge Singapore'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'Financial news from Singapore'
|
||||
publisher = 'The Edge Singapore'
|
||||
category = 'news, finances, singapore'
|
||||
language = 'en'
|
||||
|
||||
lang = 'en_SG'
|
||||
oldest_article = 15
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
encoding = 'utf-8'
|
||||
use_embedded_content = False
|
||||
extra_css = ' .contentheading{font-size: x-large} .small{font-size: small} .createdate{font-size: small; font-weight: bold} '
|
||||
|
||||
conversion_options = {
|
||||
'comment': description, 'tags': category, 'publisher': publisher, 'author': publisher, 'language': lang, 'pretty_print': True, 'linearize_tables': True
|
||||
}
|
||||
|
||||
remove_tags = [
|
||||
dict(name=['object', 'link', 'embed', 'form', 'iframe']), dict(name='div', attrs={
|
||||
'id': 'toolbar-article'}), dict(name='div', attrs={'class': 'backtotop'}), dict(name='img', attrs={'alt': 'Print'})
|
||||
]
|
||||
|
||||
remove_tags_after = dict(name='div', attrs={'class': 'backtotop'})
|
||||
|
||||
feeds = [(u'Articles', u'http://feeds.feedburner.com/edgesg')]
|
||||
|
||||
def print_version(self, url):
|
||||
return url + '?tmpl=component&print=1'
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
attribs = ['style', 'font', 'valign', 'colspan', 'width', 'height', 'rowspan', 'summary', 'align', 'cellspacing', 'cellpadding', 'frames', 'rules', 'border' # noqa
|
||||
]
|
||||
for item in soup.body.findAll(name=['table', 'td', 'tr', 'th', 'caption', 'thead', 'tfoot', 'tbody', 'colgroup', 'col']):
|
||||
item.name = 'div'
|
||||
for attrib in attribs:
|
||||
item[attrib] = ''
|
||||
del item[attrib]
|
||||
return self.adeify_images(soup)
|
@ -1,34 +0,0 @@
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
luminous-landscape.com
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class theluminouslandscape(BasicNewsRecipe):
|
||||
title = 'The Luminous Landscape'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'A photography news and information website in the form of a weblog with multiple authors who write on a variety of photography and art-photography related issues.' # noqa
|
||||
publisher = 'The Luminous Landscape '
|
||||
category = 'news, blog, photograph, international'
|
||||
oldest_article = 15
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
remove_empty_feeds = True
|
||||
use_embedded_content = True
|
||||
encoding = 'cp1252'
|
||||
language = 'en'
|
||||
|
||||
conversion_options = {
|
||||
'comment': description, 'tags': category, 'publisher': publisher, 'language': language
|
||||
}
|
||||
|
||||
feeds = [
|
||||
(u"What's new", u'http://www.luminous-landscape.com/whatsnew/rssfeed.php')]
|
||||
remove_tags = [dict(name=['object', 'link', 'iframe'])]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
return self.adeify_images(soup)
|
@ -1,24 +0,0 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
market-ticker.denninger.net
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class Themarketticker(BasicNewsRecipe):
|
||||
title = 'The Market Ticker'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'Commentary On The Capital Markets'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
language = 'en'
|
||||
|
||||
no_stylesheets = True
|
||||
use_embedded_content = True
|
||||
html2lrf_options = ['--comment', description, '--category', 'blog,news,finances', '--base-font-size', '10'
|
||||
]
|
||||
feeds = [(u'Posts', u'http://market-ticker.denninger.net/feeds/index.rss2')]
|
@ -1,42 +0,0 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- mode: python -*-
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2017, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
blog.acolyer.org
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class Themorningpaper(BasicNewsRecipe):
|
||||
title = 'The Morning Paper'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = ('an interesting/influential/important paper from'
|
||||
' the world of CS every weekday morning, as selected by Adrian Colyer')
|
||||
publisher = 'Adrian Colyer'
|
||||
category = 'news, tech'
|
||||
oldest_article = 180
|
||||
max_articles_per_feed = 200
|
||||
no_stylesheets = True
|
||||
encoding = 'utf-8'
|
||||
use_embedded_content = False
|
||||
language = 'en'
|
||||
remove_empty_feeds = True
|
||||
auto_cleanup = True
|
||||
publication_type = 'blog'
|
||||
extra_css = """
|
||||
body{font-family: Georgia,Palatino,serif }
|
||||
img{margin-bottom: 0.4em; display:block}
|
||||
"""
|
||||
|
||||
conversion_options = {
|
||||
'comment': description,
|
||||
'tags': category,
|
||||
'publisher': publisher,
|
||||
'language': language
|
||||
}
|
||||
|
||||
feeds = [(u'Articles', u'https://blog.acolyer.org/feed/')]
|
@ -1,88 +0,0 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class TheNewsRecipe(BasicNewsRecipe):
|
||||
__license__ = 'GPL v3'
|
||||
__author__ = 'kwetal'
|
||||
language = 'en_PK'
|
||||
version = 1
|
||||
|
||||
title = u'The News'
|
||||
publisher = u'Jang Group'
|
||||
category = u'News, Pakistan'
|
||||
description = u'English Newspaper from Pakistan'
|
||||
|
||||
use_embedded_content = False
|
||||
remove_empty_feeds = True
|
||||
oldest_article = 2
|
||||
max_articles_per_feed = 100
|
||||
|
||||
no_stylesheets = True
|
||||
remove_javascript = True
|
||||
encoding = 'iso-8859-1'
|
||||
|
||||
remove_tags = []
|
||||
remove_tags.append(dict(name='img', attrs={'src': 'images/thenews.gif'}))
|
||||
remove_tags.append(dict(name='img', attrs={'src': 'images/shim.gif'}))
|
||||
|
||||
# Feeds from http://thenews.com.pk/rss.asp
|
||||
feeds = []
|
||||
feeds.append(
|
||||
(u'Latest Stories', u'http://www.thenews.com.pk/rss/thenews_updates.xml'))
|
||||
feeds.append(
|
||||
(u'Top Stories', u'http://www.thenews.com.pk/rss/thenews_topstories.xml'))
|
||||
feeds.append(
|
||||
(u'World News', u'http://www.thenews.com.pk/rss/thenews_world.xml'))
|
||||
feeds.append(
|
||||
(u'National News', u'http://www.thenews.com.pk/rss/thenews_national.xml'))
|
||||
feeds.append(
|
||||
(u'Business News', u'http://www.thenews.com.pk/rss/thenews_business.xml'))
|
||||
feeds.append(
|
||||
(u'Karachi News', u'http://www.thenews.com.pk/rss/thenews_karachi.xml'))
|
||||
feeds.append(
|
||||
(u'Lahore News', u'http://www.thenews.com.pk/rss/thenews_lahore.xml'))
|
||||
feeds.append(
|
||||
(u'Islamabad News', u'http://www.thenews.com.pk/rss/thenews_islamabad.xml'))
|
||||
feeds.append(
|
||||
(u'Peshawar News', u'http://www.thenews.com.pk/rss/thenews_peshawar.xml'))
|
||||
feeds.append(
|
||||
(u'Editorial', u'http://www.thenews.com.pk/rss/thenews_editorial.xml'))
|
||||
feeds.append(
|
||||
(u'Opinion', u'http://www.thenews.com.pk/rss/thenews_opinion.xml'))
|
||||
feeds.append(
|
||||
(u'Sports News', u'http://www.thenews.com.pk/rss/thenews_sports.xml'))
|
||||
feeds.append(
|
||||
(u'Newspost', u'http://www.thenews.com.pk/rss/thenews_newspost.xml'))
|
||||
|
||||
conversion_options = {'comments': description, 'tags': category, 'language': 'en',
|
||||
'publisher': publisher, 'linearize_tables': True}
|
||||
|
||||
extra_css = '''
|
||||
body{font-family:verdana,arial,helvetica,geneva,sans-serif;}
|
||||
.heading_txt {font-size: x-large; font-weight: bold; text-align: left;}
|
||||
.small_txt {text-align: left;}
|
||||
.dateline {font-size: x-small; color: #696969; margin-top: 1em; margin-bottom: 1em}
|
||||
'''
|
||||
|
||||
def print_version(self, url):
|
||||
ignore, sep, main = url.rpartition('/')
|
||||
|
||||
if main.startswith('updates.asp'):
|
||||
return url.replace('updates.asp', 'print.asp')
|
||||
elif main.startswith('top_story_detail.asp'):
|
||||
return url.replace('top_story_detail.asp', 'print3.asp')
|
||||
elif main.startswith('daily_detail.asp'):
|
||||
return url.replace('daily_detail.asp', 'print1.asp')
|
||||
else:
|
||||
return None
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
for tr in soup.findAll('tr', attrs={'bgcolor': True}):
|
||||
del tr['bgcolor']
|
||||
|
||||
td = soup.find('td', attrs={'class': 'small_txt', 'height': '20'})
|
||||
if td:
|
||||
del td['height']
|
||||
td['class'] = 'dateline'
|
||||
|
||||
return soup
|
@ -1,29 +0,0 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
www.theoldfoodie.com
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class TheOldFoodie(BasicNewsRecipe):
|
||||
title = 'The Old Foodie'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'Food blog'
|
||||
category = 'cuisine, food, blog'
|
||||
oldest_article = 30
|
||||
max_articles_per_feed = 100
|
||||
use_embedded_content = True
|
||||
no_stylesheets = True
|
||||
encoding = 'utf-8'
|
||||
language = 'en'
|
||||
|
||||
conversion_options = {
|
||||
'comments': description, 'tags': category, 'language': 'en'
|
||||
}
|
||||
|
||||
feeds = [
|
||||
(u'Articles', u'http://www.theoldfoodie.com/feeds/posts/default?alt=rss')]
|
@ -1,89 +0,0 @@
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2009-2013, Darko Miletic <darko.miletic at gmail.com>'
|
||||
|
||||
'''
|
||||
theonion.com
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class TheOnion(BasicNewsRecipe):
|
||||
title = 'The Onion'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = "The Onion, America's Finest News Source, is an award-winning publication covering world, national, and * local issues. It is updated daily online and distributed weekly in select American cities." # noqa
|
||||
oldest_article = 2
|
||||
max_articles_per_feed = 100
|
||||
publisher = 'Onion, Inc.'
|
||||
category = 'humor, news, USA'
|
||||
language = 'en'
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
encoding = 'utf-8'
|
||||
publication_type = 'newsportal'
|
||||
needs_subscription = 'optional'
|
||||
masthead_url = 'http://www.theonion.com/static/onion/img/logo_1x.png'
|
||||
cover_url = 'http://www.theonion.com/static/onion/img/logo_1x.png'
|
||||
extra_css = """
|
||||
body{font-family: Helvetica,Arial,sans-serif}
|
||||
.section_title{color: gray; text-transform: uppercase}
|
||||
.title{font-family: Georgia,serif}
|
||||
.meta{color: gray; display: inline}
|
||||
.has_caption{display: block}
|
||||
.caption{font-size: x-small; color: gray; margin-bottom: 0.8em}
|
||||
"""
|
||||
|
||||
conversion_options = {
|
||||
'comment': description, 'tags': category, 'publisher': publisher, 'language': language
|
||||
}
|
||||
|
||||
keep_only_tags = [
|
||||
dict(attrs={'class': lambda x: x and 'content-wrapper' in x.split()})]
|
||||
remove_attributes = ['lang', 'rel']
|
||||
remove_tags = [
|
||||
dict(name=['object', 'link', 'iframe', 'base', 'meta', 'button', 'footer', 'blockquote', 'figcaption']), dict(attrs={'class': lambda x: x and 'share-tools' in x.split()}), dict(attrs={'class': lambda x: x and 'content-meta' in x.split()}), dict(attrs={'class': 'below-article-tools'}), dict(name='div', attrs={'id': ['topshare', 'bottomshare']}) # noqa
|
||||
]
|
||||
|
||||
feeds = [
|
||||
(u'Daily', u'http://feeds.theonion.com/theonion/daily'), (u'Sports',
|
||||
u'http://feeds.theonion.com/theonion/sports')
|
||||
]
|
||||
|
||||
def get_browser(self):
|
||||
br = BasicNewsRecipe.get_browser(self)
|
||||
br.open('http://www.theonion.com/')
|
||||
if self.username is not None and self.password is not None:
|
||||
br.open('https://ui.ppjol.com/login/onion/u/j_spring_security_check')
|
||||
br.select_form(name='f')
|
||||
br['j_username'] = self.username
|
||||
br['j_password'] = self.password
|
||||
br.submit()
|
||||
return br
|
||||
|
||||
def get_article_url(self, article):
|
||||
artl = BasicNewsRecipe.get_article_url(self, article)
|
||||
if artl.startswith('http://www.theonion.com/audio/'):
|
||||
artl = None
|
||||
return artl
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
for item in soup.findAll('a'):
|
||||
limg = item.find('img')
|
||||
if item.string is not None:
|
||||
str = item.string
|
||||
item.replaceWith(str)
|
||||
else:
|
||||
if limg:
|
||||
item.name = 'div'
|
||||
item.attrs = []
|
||||
if not limg.get('alt'):
|
||||
limg['alt'] = 'image'
|
||||
else:
|
||||
str = self.tag_to_string(item)
|
||||
item.replaceWith(str)
|
||||
for item in soup.findAll('img'):
|
||||
if item.get('data-src'):
|
||||
item['src'] = item['data-src']
|
||||
return soup
|
@ -1,62 +0,0 @@
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
thewest.com.au
|
||||
'''
|
||||
|
||||
import re
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class TheWest(BasicNewsRecipe):
|
||||
title = 'The West Australian'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'News from Australia'
|
||||
publisher = 'thewest.com.au'
|
||||
category = 'news, politics, Australia'
|
||||
oldest_article = 2
|
||||
max_articles_per_feed = 200
|
||||
no_stylesheets = True
|
||||
encoding = 'utf8'
|
||||
use_embedded_content = False
|
||||
language = 'en_AU'
|
||||
remove_empty_feeds = True
|
||||
publication_type = 'newspaper'
|
||||
masthead_url = 'http://l.yimg.com/ao/i/mp/properties/news/02/wan/img/wan-logo-h49.png'
|
||||
extra_css = ' .article{font-family: Arial,Helvetica,sans-serif } .image{font-size: x-small} '
|
||||
|
||||
preprocess_regexps = [
|
||||
(re.compile(r'</title>.*?</head>', re.DOTALL |
|
||||
re.IGNORECASE), lambda match: '</title></head>')
|
||||
]
|
||||
|
||||
conversion_options = {
|
||||
'comment': description, 'tags': category, 'publisher': publisher, 'language': language
|
||||
}
|
||||
|
||||
remove_tags = [
|
||||
dict(attrs={'class': ['tools', 'lhs']}), dict(attrs={
|
||||
'id': 'tools-bottom'}), dict(attrs={'href': 'http://twitter.com/thewest_com_au'})
|
||||
]
|
||||
keep_only_tags = [dict(attrs={'class': 'mod article'})]
|
||||
remove_attributes = ['width', 'height']
|
||||
|
||||
feeds = [
|
||||
|
||||
(u'WA News', u'http://d.yimg.com/au.rss.news.yahoo.com/thewest/wa.xml'),
|
||||
(u'National', u'http://d.yimg.com/au.rss.news.yahoo.com/thewest/national.xml'),
|
||||
(u'World', u'http://d.yimg.com/au.rss.news.yahoo.com/thewest/world.xml'),
|
||||
(u'Offbeat', u'http://d.yimg.com/au.rss.news.yahoo.com/thewest/offbeat.xml'),
|
||||
(u'Business', u'http://d.yimg.com/au.rss.news.yahoo.com/thewest/business.xml'),
|
||||
(u'Sport', u'http://d.yimg.com/au.rss.news.yahoo.com/thewest/sport.xml'),
|
||||
(u'Entertainment', u'http://d.yimg.com/au.rss.news.yahoo.com/thewest/entertainment.xml'),
|
||||
(u'Travel', u'http://d.yimg.com/au.rss.news.yahoo.com/thewest/travel.xml'),
|
||||
(u'Life+Style', u'http://d.yimg.com/au.rss.news.yahoo.com/thewest/lifestyle.xml')
|
||||
]
|
||||
|
||||
def get_article_url(self, article):
|
||||
return article.get('guid', None)
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
return self.adeify_images(soup)
|
@ -1,13 +0,0 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class AdvancedUserRecipe1263409732(BasicNewsRecipe):
|
||||
title = u'Think Progress'
|
||||
description = u'A compilation of progressive articles on social and economic justice, healthy communities, media accountability, global and domestic security.' # noqa
|
||||
__author__ = u'Xanthan Gum'
|
||||
language = 'en'
|
||||
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
|
||||
feeds = [(u'News Articles', u'http://thinkprogress.org/feed/')]
|