This commit is contained in:
Kovid Goyal 2024-11-04 18:42:33 +05:30
commit dfaaa56172
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C
130 changed files with 149 additions and 3202 deletions

BIN
recipes/icons/sonar21.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.2 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.3 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 833 B

Binary file not shown.

Before

Width:  |  Height:  |  Size: 761 B

Binary file not shown.

Before

Width:  |  Height:  |  Size: 489 B

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.0 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 977 B

Binary file not shown.

Before

Width:  |  Height:  |  Size: 269 B

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.4 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 484 B

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.7 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 403 B

Binary file not shown.

Before

Width:  |  Height:  |  Size: 917 B

Binary file not shown.

Before

Width:  |  Height:  |  Size: 590 B

Binary file not shown.

Before

Width:  |  Height:  |  Size: 353 B

Binary file not shown.

Before

Width:  |  Height:  |  Size: 936 B

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.5 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 688 B

Binary file not shown.

Before

Width:  |  Height:  |  Size: 672 B

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.8 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 671 B

Binary file not shown.

Before

Width:  |  Height:  |  Size: 2.3 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 449 B

Binary file not shown.

Before

Width:  |  Height:  |  Size: 370 B

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.8 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 925 B

Binary file not shown.

Before

Width:  |  Height:  |  Size: 753 B

Binary file not shown.

Before

Width:  |  Height:  |  Size: 890 B

Binary file not shown.

Before

Width:  |  Height:  |  Size: 878 B

Binary file not shown.

Before

Width:  |  Height:  |  Size: 681 B

Binary file not shown.

Before

Width:  |  Height:  |  Size: 328 B

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.3 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 350 B

Binary file not shown.

Before

Width:  |  Height:  |  Size: 251 B

Binary file not shown.

Before

Width:  |  Height:  |  Size: 258 B

Binary file not shown.

Before

Width:  |  Height:  |  Size: 586 B

Binary file not shown.

Before

Width:  |  Height:  |  Size: 645 B

Binary file not shown.

Before

Width:  |  Height:  |  Size: 296 B

Binary file not shown.

Before

Width:  |  Height:  |  Size: 560 B

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.1 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 642 B

Binary file not shown.

Before

Width:  |  Height:  |  Size: 418 B

Binary file not shown.

Before

Width:  |  Height:  |  Size: 174 B

Binary file not shown.

Before

Width:  |  Height:  |  Size: 576 B

Binary file not shown.

Before

Width:  |  Height:  |  Size: 685 B

Binary file not shown.

Before

Width:  |  Height:  |  Size: 685 B

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.2 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 415 B

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.5 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 614 B

Binary file not shown.

Before

Width:  |  Height:  |  Size: 330 B

Binary file not shown.

Before

Width:  |  Height:  |  Size: 297 B

Binary file not shown.

Before

Width:  |  Height:  |  Size: 336 B

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.8 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 74 B

Binary file not shown.

Before

Width:  |  Height:  |  Size: 205 B

Binary file not shown.

Before

Width:  |  Height:  |  Size: 159 B

Binary file not shown.

Before

Width:  |  Height:  |  Size: 903 B

Binary file not shown.

Before

Width:  |  Height:  |  Size: 822 B

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.1 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 153 B

Binary file not shown.

Before

Width:  |  Height:  |  Size: 768 B

Binary file not shown.

Before

Width:  |  Height:  |  Size: 282 B

Binary file not shown.

Before

Width:  |  Height:  |  Size: 317 B

BIN
recipes/icons/unz.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 494 B

42
recipes/sonar21.recipe Normal file
View File

@ -0,0 +1,42 @@
#!/usr/bin/env python
# vim:fileencoding=utf-8
from calibre.web.feeds.news import BasicNewsRecipe, classes
class Sonar21(BasicNewsRecipe):
title = 'Sonar21'
__author__ = 'unkn0wn'
oldest_article = 7
language = 'en_US'
max_articles_per_feed = 100
use_embedded_content = False
masthead_url = 'https://sonar21.com/wp-content/uploads/2024/10/logo_999999_720x216.png'
cover_url = 'https://sonar21.com/wp-content/uploads/2024/09/sonar21_backplate_vertical.jpg'
encoding = 'utf-8'
browser_type = 'webengine'
no_stylesheets = True
remove_attributes = ['style', 'height', 'width']
extra_css = '.entry-meta, .wp-element-caption, .wp-block-image { font-size: small; }'
keep_only_tags = [classes('entry-header entry-content')]
remove_tags = [
dict(name=['iframe', 'svg']),
classes('addtoany_share_save_container wpd-avatar'),
]
recipe_specific_options = {
'days': {
'short': 'Oldest article to download from this news source. In days ',
'long': 'For example, 0.5, gives you articles from the past 12 hours',
'default': str(oldest_article),
},
}
def __init__(self, *args, **kwargs):
BasicNewsRecipe.__init__(self, *args, **kwargs)
d = self.recipe_specific_options.get('days')
if d and isinstance(d, str):
self.oldest_article = float(d)
feeds = ['https://sonar21.com/feed']

View File

@ -6,6 +6,7 @@
# Copyright: Nathan Cook (nathan.cook@gmail.com)
##
# Written: 2020-12-18
# Updated: 2024-11-04
##
__license__ = 'GNU General Public License v3 https://www.gnu.org/licenses/gpl-3.0.html'
@ -14,6 +15,7 @@ __version__ = 'v0.1.1'
__date__ = '2020-12-19'
__author__ = 'topynate'
import re
import json
from calibre.web.feeds.news import BasicNewsRecipe
@ -21,21 +23,36 @@ from mechanize import Request
class Substack(BasicNewsRecipe):
title = 'Substack'
__author__ = 'topynate'
title = 'Substack'
__author__ = 'topynate, unkn0wn'
description = 'Use advanced menu if you want to add your own substack handles.'
oldest_article = 7
language = 'en'
max_articles_per_feed = 100
auto_cleanup = True
auto_cleanup = True
auto_cleanup_keep = '//*[@class="subtitle"]'
needs_subscription = 'optional'
use_embedded_content = False
masthead_url = 'https://substack.com/img/substack_wordmark.png'
cover_url = 'https://substack.com/img/substack.png'
extra_css = '.captioned-image-container, .image-container {font-size: small;}'
recipe_specific_options = {
'auths': {
'short': 'enter the @handles you subscribe to:\nseperated by a space',
'long': 'julianmacfarlane ianleslie .... ....',
'default': 'julianmacfarlane ianleslie thesalvo',
},
'days': {
'short': 'Oldest article to download from this news source. In days ',
'long': 'For example, 0.5, gives you articles from the past 12 hours',
'default': str(oldest_article)
}
'default': str(oldest_article),
},
'res': {
'short': 'For hi-res images, select a resolution from the\nfollowing options: 800, 1000, 1200 or 1500',
'long': 'This is useful for non e-ink devices, and for a lower file size\nthan the default, use 400 or 300.',
'default': '600',
},
}
def __init__(self, *args, **kwargs):
@ -44,12 +61,12 @@ class Substack(BasicNewsRecipe):
if d and isinstance(d, str):
self.oldest_article = float(d)
# Every Substack publication has an RSS feed at https://{name}.substack.com/feed.
# The same URL provides either all posts, or all free posts + previews of paid posts,
# depending on whether you're logged in.
feeds = [
('Novum Lumen', 'https://novumlumen.substack.com/feed'), # gratuitously self-promotional example
]
# Every Substack publication has an RSS feed at https://{name}.substack.com/feed.
# The same URL provides either all posts, or all free posts + previews of paid posts,
# depending on whether you're logged in.
# feeds = [
# ('Novum Lumen', 'https://novumlumen.substack.com/feed'), # gratuitously self-promotional example
# ]
def get_browser(self):
br = BasicNewsRecipe.get_browser(self)
@ -70,3 +87,24 @@ class Substack(BasicNewsRecipe):
if res.getcode() != 200:
raise ValueError('Login failed, check username and password')
return br
def get_feeds(self):
ans = []
u = self.recipe_specific_options.get('auths')
if u and isinstance(u, str):
for x in u.split():
ans.append('https://' + x.replace('@', ' ') + '.substack.com/feed')
return ans
def preprocess_html(self, soup):
res = '600'
w = self.recipe_specific_options.get('res')
if w and isinstance(w, str):
res = w
for img in soup.findAll('img', attrs={'src': True}):
img['src'] = re.sub(r'w_\d+', 'w_' + res, img['src'])
for src in soup.findAll(['source', 'svg']):
src.extract()
for but in soup.findAll(attrs={'class': ['button-wrapper']}):
but.extract()
return soup

View File

@ -1,69 +0,0 @@
import re
from calibre.web.feeds.news import BasicNewsRecipe
def classes(classes):
q = frozenset(classes.split(' '))
return dict(
attrs={'class': lambda x: x and frozenset(x.split()).intersection(q)}
)
class TN(BasicNewsRecipe):
title = u'Taipei Times'
language = 'en_TW'
__author__ = 'Krittika Goyal'
oldest_article = 1 # days
max_articles_per_feed = 25
use_embedded_content = False
no_stylesheets = True
keep_only_tags = [
dict(name='h1'),
dict(name='h3', attrs={'class': 'a'}),
classes('main_ipic reporter text page'),
]
feeds = [
('Front Page', 'http://www.taipeitimes.com/xml/front.rss'),
('Editorials', 'http://www.taipeitimes.com/xml/editorials.rss'),
('Taiwan', 'http://www.taipeitimes.com/xml/taiwan.rss'),
('Features', 'http://www.taipeitimes.com/xml/feat.rss'),
('Business', 'http://www.taipeitimes.com/xml/biz.rss'),
('World', 'http://www.taipeitimes.com/xml/world.rss'),
('Sports', 'http://www.taipeitimes.com/xml/sport.rss'),
]
def preprocess_html(self, soup, *a):
for div in soup.findAll(**classes('page')):
for a in div.findAll('a', href=True):
a['data-calibre-follow-link'] = '1'
if a['href'].startswith('/'):
a['href'] = 'http://www.taipeitimes.com' + a['href']
return soup
recursions = 1
def is_link_wanted(self, url, tag):
digit = re.search(r'/(\d+)$', url)
if digit is not None and tag['data-calibre-follow-link'] == '1' and re.match(r'\d+', self.tag_to_string(tag)) is not None:
if int(digit.group(1)) > 1:
return True
return False
def postprocess_html(self, soup, *a):
for div in soup.findAll(**classes('page')):
div.extract()
return soup
# def parse_index(self):
# return [(
# 'Articles', [{
# 'title':
# 'test',
# 'url':
# 'http://www.taipeitimes.com/News/editorials/archives/2019/02/26/2003710411'
# }]
# )]

View File

@ -1,32 +0,0 @@
from calibre.web.feeds.recipes import BasicNewsRecipe
class TaNea(BasicNewsRecipe):
title = u'Ta Nea'
__author__ = 'Pan'
oldest_article = 1
max_articles_per_feed = 100
no_stylesheets = True
language = 'el'
remove_tags_before = dict(name='div', attrs={'id': 'print-body'})
remove_tags_after = dict(name='div', attrs={'id': 'text'})
feeds = [
(u'\xce\x95\xce\xbb\xce\xbb\xce\xac\xce\xb4\xce\xb1',
u'http://www.tanea.gr/default.asp?pid=66&la=1'),
(u'\xce\x9a\xcf\x8c\xcf\x83\xce\xbc\xce\xbf\xcf\x82',
u'http://www.tanea.gr/default.asp?pid=67&la=1'),
(u'\xce\x9f\xce\xb9\xce\xba\xce\xbf\xce\xbd\xce\xbf\xce\xbc\xce\xaf\xce\xb1',
u'http://www.tanea.gr/default.asp?pid=68&la=1'),
(u'\xce\xa0\xce\xbf\xce\xbb\xce\xb9\xcf\x84\xce\xb9\xcf\x83\xce\xbc\xcf\x8c\xcf\x82',
u'http://www.tanea.gr/default.asp?pid=69&la=1'),
(u'\xce\x93\xce\xbd\xcf\x8e\xce\xbc\xce\xb5\xcf\x82',
u'http://www.tanea.gr/default.asp?pid=79&la=1'),
(u'\xce\xa1\xce\xb9\xcf\x80\xce\xad\xcf\x82',
u'http://www.tanea.gr/default.asp?pid=80&la=1'),
(u'\xce\x91\xce\xb9\xcf\x87\xce\xbc\xce\xad\xcf\x82',
u'http://www.tanea.gr/default.asp?pid=81&la=1')]
def print_version(self, url):
return url.replace('http://www.tanea.gr/default.asp?pid=2', 'http://www.tanea.gr/default.asp?pid=96')

View File

@ -1,46 +0,0 @@
import re
from calibre.web.feeds.news import BasicNewsRecipe
class tanuki(BasicNewsRecipe):
title = u'Tanuki'
oldest_article = 7
__author__ = 'fenuks'
description = u'Tanuki - portal o anime i mandze.'
category = 'anime, manga'
language = 'pl'
max_articles_per_feed = 100
encoding = 'utf-8'
autocleanup = True
extra_css = 'ul {list-style: none; padding: 0; margin: 0;} .kadr{float: left;} .dwazdania {float: right;}'
preprocess_regexps = [(re.compile(u'<h3><a class="screen".*?</h3>', re.DOTALL), lambda match: ''), (re.compile(
type(u'')(r'<div><a href="/strony/((manga)|(anime))/[0-9]+?/oceny(\-redakcji){0,1}">Zobacz jak ocenili</a></div>'), re.DOTALL), lambda match: '')]
remove_empty_feeds = True
no_stylesheets = True
keep_only_tags = [dict(attrs={'class': ['animename', 'storyname', 'nextarrow', 'sideinfov', 'sidelinfov', 'sideinfo', 'sidelinfo']}), dict(name='table', attrs={ 'summary': 'Technikalia'}), dict(attrs={'class': ['chaptername', 'copycat']}), dict(id='rightcolumn'), dict(attrs={'class': ['headn_tt', 'subtable']})] # noqa
remove_tags = [dict(name='div', attrs={'class': 'screen'}), dict(id='randomtoplist'), dict(attrs={'class': 'note'})]
feeds = [
(u'Anime', u'http://anime.tanuki.pl/rss_anime.xml'),
(u'Manga', u'http://manga.tanuki.pl/rss_manga.xml'),
(u'Tomiki', u'http://manga.tanuki.pl/rss_mangabooks.xml'),
(u'Artyku\u0142y', u'http://czytelnia.tanuki.pl/rss_czytelnia_artykuly.xml'),
(u'Opowiadania', u'http://czytelnia.tanuki.pl/rss_czytelnia.xml')]
def append_page(self, soup, appendtag):
nexturl = appendtag.find(attrs={'class': 'nextarrow'})
if nexturl:
while nexturl:
soup2 = self.index_to_soup(
'http://czytelnia.tanuki.pl' + nexturl['href'])
nexturl = soup2.find(attrs={'class': 'nextarrow'})
pagetext = soup2.find(
attrs={'class': ['chaptername', 'copycat']})
pos = len(appendtag.contents)
appendtag.insert(pos, pagetext)
pagetext = soup2.find(attrs={'class': 'copycat'})
pos = len(appendtag.contents)
appendtag.insert(pos, pagetext)
for r in appendtag.findAll(attrs={'class': 'nextarrow'}):
r.extract()

View File

@ -1,73 +0,0 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
__license__ = 'GPL v3'
__copyright__ = '2010, Lars Jacob jacob.lars at gmail.com'
__docformat__ = 'restructuredtext de'
'''
www.taz.de/digiabo
'''
import os
import zipfile
from calibre.ptempfile import PersistentTemporaryFile
from calibre.web.feeds.news import BasicNewsRecipe
try:
from urllib.error import HTTPError
from urllib.request import HTTPBasicAuthHandler, build_opener, install_opener, urlopen
except ImportError:
from urllib2 import HTTPBasicAuthHandler, HTTPError, build_opener, install_opener, urlopen
class TazDigiabo(BasicNewsRecipe):
title = u'Taz Digiabo'
description = u'Das EPUB DigiAbo der Taz'
language = 'de'
lang = 'de-DE'
__author__ = 'Lars Jacob'
needs_subscription = True
conversion_options = {
'no_default_epub_cover': True
}
def build_index(self):
domain = "http://dl.taz.de"
url = domain + "/epub/"
auth_handler = HTTPBasicAuthHandler()
auth_handler.add_password(realm='TAZ-ABO',
uri=url,
user=self.username,
passwd=self.password)
opener = build_opener(auth_handler)
install_opener(opener)
try:
f = urlopen(url)
except HTTPError:
self.report_progress(0, _('Can\'t login to download issue'))
raise ValueError('Failed to login, check your username and'
' password')
tmp = PersistentTemporaryFile(suffix='.epub')
self.report_progress(0, _('downloading epub'))
tmp.write(f.read())
tmp.close()
zfile = zipfile.ZipFile(tmp.name, 'r')
self.report_progress(0, _('extracting epub'))
zfile.extractall(self.output_dir)
tmp.close()
index = os.path.join(self.output_dir, 'content.opf')
self.report_progress(1, _('epub downloaded and extracted'))
return index

View File

@ -1,15 +0,0 @@
__license__ = 'GPL v3'
from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1327051385(BasicNewsRecipe):
title = u'Tech Economy'
oldest_article = 7
max_articles_per_feed = 100
auto_cleanup = True
masthead_url = 'http://www.techeconomy.it/wp-content/uploads/2012/01/Logo-TE9.png'
feeds = [(u'Tech Economy', u'http://www.techeconomy.it/feed/')]
remove_tags_after = [dict(name='div', attrs={'class': 'cab-author-name'})]
__author__ = 'faber1971'
description = 'Italian website on technology - v1.00 (28, January 2012)'
language = 'it'

View File

@ -1,46 +0,0 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__author__ = 'Lorenzo Vigentini'
__copyright__ = '2009, Lorenzo Vigentini <l.vigentini at gmail.com>'
__version__ = 'v1.01'
__date__ = '14, January 2010'
'''
http://www.techworld.com/
'''
from calibre.web.feeds.news import BasicNewsRecipe
class techworld(BasicNewsRecipe):
__author__ = 'Lorenzo Vigentini'
description = 'Techworld offers the latest breaking IT industry news, product reviews, enterprise software downloads, how-to articles and expert blogs for technical professionals and enterprise users in the UK' # noqa
title = 'TechWorld'
publisher = 'IDG Communication'
category = ('Apple, Mac, video, computing, product reviews, '
'editing, cameras, production')
language = 'en'
timefmt = '[%a, %d %b, %Y]'
oldest_article = 7
max_articles_per_feed = 15
use_embedded_content = False
recursion = 10
remove_javascript = True
no_stylesheets = True
auto_cleanup = True
feeds = [
(u'News', u'http://www.techworld.com/news/rss'),
(u'Tutorial', u'http://www.techworld.com/tutorial/rss'),
(u'Reviews', u'http://www.techworld.com/review/rss'),
(u'Features', u'http://www.techworld.com/features/rss'),
(u'Analysis', u'http://www.techworld.com/analysis/rss'),
(u'Galleries',
u'http://www.techworld.com/picture-gallery/rss'),
(u'TechWorld Blogs',
u'http://www.techworld.com/blog/rss'),
]

View File

@ -1,31 +0,0 @@
from calibre.web.feeds.news import BasicNewsRecipe
class TechnologyReview(BasicNewsRecipe):
title = u'Technology Review'
__author__ = 'rty'
description = 'MIT Technology Magazine (from RSS feeds)'
publisher = 'Technology Review Inc.'
category = 'Technology, Innovation, R&D'
language = 'en'
oldest_article = 14
max_articles_per_feed = 100
No_stylesheets = True
auto_cleanup = True
extra_css = """
.ArticleBody {font: normal; text-align: justify}
.headline {font: bold x-large}
.subheadline {font: italic large}
"""
feeds = [
(u'Computing',
u'http://feeds.technologyreview.com/technology_review_Computing'),
(u'Energy',
u'http://feeds.technologyreview.com/technology_review_Energy'),
(u'Materials',
u'http://feeds.technologyreview.com/technology_review_Materials'),
(u'Biomedicine',
u'http://feeds.technologyreview.com/technology_review_Biotech'),
(u'Business',
u'http://feeds.technologyreview.com/technology_review_Biztech')
]

View File

@ -1,69 +0,0 @@
#!/usr/bin/env python
# vim:fileencoding=utf-8
from __future__ import absolute_import, division, print_function, unicode_literals
__license__ = 'GPL v3'
__copyright__ = '2010, Anton Gillert <atx at binaryninja.de>'
'''
Technology Review (deutsch) - heise.de/tr
'''
import re
from calibre.web.feeds.news import BasicNewsRecipe
class TechnologyReviewDe(BasicNewsRecipe):
title = 'Technology Review'
__author__ = 'Anton Gillert, schuster'
description = 'Technology news from Germany'
language = 'de'
oldest_article = 14
max_articles_per_feed = 50
use_embedded_content = False
no_stylesheets = True
remove_javascript = True
masthead_url = 'http://1.f.ix.de/imgs/02/3/0/8/5/2/8/tr_logo-544bd18881c81263.png'
feeds = [
('News', 'http://www.heise.de/tr/rss/news-atom.xml'),
('Blog', 'http://www.heise.de/tr/rss/blog-atom.xml')
]
keep_only_tags = [
dict(name='article')
]
remove_tags = [
dict(name='nav'),
dict(name='figure', attrs={'class': 'logo'}),
dict(name='hr')
]
extra_css = '.bild_zentriert {font-size: 0.6em} \
.source {font-size: 0.6em}'
def get_cover_url(self):
self.cover_url = ''
soup = self.index_to_soup('http://www.heise.de/tr/magazin/')
img = soup.find('img', alt=re.compile(
'Titelbild Technology Review'), src=True)
if img:
self.cover_url = 'http://www.heise.de' + img['src']
return self.cover_url
def print_version(self, url):
return url + '?view=print'
def preprocess_html(self, soup):
# remove style attributes
for item in soup.findAll(attrs={'style': True}):
del item['style']
# remove reference to article source
for p in soup.findAll('p'):
if 'URL dieses Artikels:' in self.tag_to_string(p):
p.extract()
return soup

View File

@ -1,64 +0,0 @@
from calibre.web.feeds.news import BasicNewsRecipe
class TechTarget(BasicNewsRecipe):
title = u'Techtarget'
__author__ = 'Julio:map'
description = '''IT Infrastructure related blogs
from Techtarget'''
publisher = 'Techtarget'
category = 'IT, Infrastructure'
oldest_article = 7
language = 'en'
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
needs_subscription = True
auto_cleanup = False
LOGIN = u'http://searchservervirtualization.techtarget.com/login'
def get_browser(self):
br = BasicNewsRecipe.get_browser(self)
if self.username is not None:
br.open(self.LOGIN)
br.select_form(nr=1)
br['email'] = self.username
if self.password is not None:
br['password'] = self.password
br.submit()
return br
keep_only_tags = [dict(name='div', attrs={'id': 'article'}), dict(
name='div', attrs={'class': 'entry'})]
remove_tags = [
dict(name='div', attrs={'id': ['articleToolbar', 'relatedContent']})]
remove_tags_after = [dict(name='div', attrs={'id': 'relatedContent'})]
feeds = [
(u'IT news and analysis for CIOs',
u'http://feeds.pheedo.com/SearchCIOITNewsAndAnalysisForCIOs'),
(u'TotalCIO', u'http://feeds.pheedo.com/1532.xml'),
(u'SearchCIO-Midmarket: Technology news and tips for midmarket CIOs',
u'http://feeds.pheedo.com/techtarget/Searchsmb/Smbs'),
(u'Compliance news and advice for senior IT and business managers',
u'http://feeds.pheedo.com/tt/1200'),
(u'Server virtualization news and opinions',
u'http://feeds.pheedo.com/SearchservervirtualizationServerVirtualizationNewsAndOpinions'),
(u'The Virtualization Room', u'http://feeds.pheedo.com/techtarget/nzLe'),
(u'Server virtualization technical tips and expert advice',
u'http://feeds.pheedo.com/SearchservervirtualizationServerVirtualizationTechnicalTipsAndExpertAdvice'),
(u'Cloud Computing news and Technical Advice',
u'http://feeds.pheedo.com/1260'),
(u'IT infrastructure news',
u'http://feeds.pheedo.com/techtarget/Searchdatacenter/ItInfrastructure'),
(u'Storage Channel Update',
u'http://feeds.pheedo.com/ChannelMarker-TheItChannelWeblog'),
(u'VMware Tips and News',
u'http://feeds.pheedo.com/SearchvmwarecomVmwareTipsAndTricks'),
(u'Enterprise IT news roundup',
u'http://feeds.pheedo.com/WhatisEnterpriseItNewsRoundup'),
(u'WhatIs: Enterprise IT tips and expert advice',
u'http://feeds.pheedo.com/WhatisEnterpriseItTipsAndExpertAdvice'),
(u'WhatIs: Enterprise IT news roundup',
u'http://feeds.pheedo.com/WhatisEnterpriseItNewsRoundup'),
]

View File

@ -1,30 +0,0 @@
__license__ = 'GPL v3'
__copyright__ = '2011, Darko Miletic <darko.miletic at gmail.com>'
'''
blogs.tedneward.com
'''
from calibre.web.feeds.news import BasicNewsRecipe
class InteroperabilityHappens(BasicNewsRecipe):
title = 'Interoperability Happens'
__author__ = 'Darko Miletic'
description = 'Tech blog by Ted Neward'
oldest_article = 15
max_articles_per_feed = 100
language = 'en'
encoding = 'utf-8'
no_stylesheets = True
use_embedded_content = True
publication_type = 'blog'
extra_css = """
body{font-family: Verdana,Arial,Helvetica,sans-serif}
"""
conversion_options = {
'comment': description, 'tags': 'blog, technology, microsoft, programming, C#, Java', 'publisher': 'Ted Neward', 'language': language
}
feeds = [(u'Posts', u'http://blogs.tedneward.com/SyndicationService.asmx/GetRss')]

View File

@ -1,57 +0,0 @@
#!/usr/bin/env python
# -*- mode: python -*-
# -*- coding: utf-8 -*-
__license__ = 'GPL v3'
__copyright__ = '2012-2016, Darko Miletic <darko.miletic at gmail.com>'
'''
www.telam.com.ar
'''
from calibre.web.feeds.news import BasicNewsRecipe
class Telam(BasicNewsRecipe):
title = 'Telam'
__author__ = 'Darko Miletic'
description = 'AGENCIA DE NOTICIAS DE LA REPUBLICA ARGENTINA'
publisher = 'Telam S.E.'
category = 'news, politics, Argentina'
oldest_article = 2
max_articles_per_feed = 200
no_stylesheets = True
encoding = 'windows-1252'
use_embedded_content = False
language = 'es_AR'
remove_empty_feeds = True
auto_cleanup = True
publication_type = 'newsportal'
PREFIX = 'http://www.telam.com.ar'
masthead_url = 'http://www.telam.com.ar/assets/img/logo.svg'
extra_css = """
body{font-family: Arial,Helvetica,sans-serif }
img{margin-bottom: 0.4em; display:block}
"""
conversion_options = {
'comment': description, 'tags': category, 'publisher': publisher, 'language': language
}
feeds = [
(u'Ultimas noticias', u'http://www.telam.com.ar/rss2/ultimasnoticias.xml'),
(u'Politica', u'http://www.telam.com.ar/rss2/politica.xml'),
(u'Economia', u'http://www.telam.com.ar/rss2/economia.xml'),
(u'Sociedad', u'http://www.telam.com.ar/rss2/sociedad.xml'),
(u'Policiales', u'http://www.telam.com.ar/rss2/policiales.xml'),
(u'Internacionales', u'http://www.telam.com.ar/rss2/internacional.xml'),
(u'Espectaculos', u'http://www.telam.com.ar/rss2/espectaculos.xml'),
(u'Cultura', u'http://www.telam.com.ar/rss2/cultura.xml'),
(u'Deportes', u'http://www.telam.com.ar/rss2/deportes.xml'),
(u'Educacion', u'http://www.telam.com.ar/rss2/educacion.xml')
]
def get_article_url(self, article):
url = BasicNewsRecipe.get_article_url(self, article)
if url.startswith('/'):
return self.PREFIX + url
return url

View File

@ -1,44 +0,0 @@
# -*- coding: utf-8 -*-
__license__ = 'GPL v3'
__copyright__ = '2009, Gerhard Aigner <gerhard.aigner at gmail.com>'
''' http://www.derstandard.at - Austrian Newspaper '''
import re
from calibre.web.feeds.news import BasicNewsRecipe
class TelepolisArtikel(BasicNewsRecipe):
title = u'Telepolis (Artikel)'
__author__ = 'Gerhard Aigner'
publisher = 'Heise Zeitschriften Verlag GmbH & Co KG'
category = 'news'
description = 'Telepolis Artikel'
language = 'de_AT'
oldest_article = 7
max_articles_per_feed = 100
recursion = 0
no_stylesheets = True
use_embedded_content = False
remove_empty_feeds = True
remove_tags_before = dict(name='h1')
remove_tags = [dict(name='img')]
feeds = [(u'Artikel', u'http://www.heise.de/tp/rss/news-a.rdf')]
preprocess_regexps = [(re.compile(r'<a[^>]*>', re.DOTALL | re.IGNORECASE), lambda match: ''),
(re.compile(r'</a>', re.DOTALL | re.IGNORECASE), lambda match: ''), ]
html2lrf_options = [
'--comment', description, '--category', category, '--publisher', publisher]
html2epub_options = 'publisher="' + publisher + \
'"\ncomments="' + description + '"\ntags="' + category + '"'
def print_version(self, url):
p = re.compile(r'\d{5}', re.DOTALL | re.IGNORECASE)
m = p.search(url)
return "http://www.heise.de/bin/tp/issue/r4/dl-artikel2.cgi?artikelnr=" + m.group() + "&mode=print"

View File

@ -1,40 +0,0 @@
from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1299054026(BasicNewsRecipe):
title = u'Thai Post Daily'
__author__ = 'Chotechai P.'
language = 'th'
oldest_article = 7
max_articles_per_feed = 100
cover_url = 'http://upload.wikimedia.org/wikipedia/th/1/10/ThaiPost_Logo.png'
feeds = [
(u'\u0e02\u0e48\u0e32\u0e27\u0e2b\u0e19\u0e49\u0e32\u0e2b\u0e19\u0e36\u0e48\u0e07', u'http://thaipost.net/taxonomy/term/1/all/feed'),
(u'\u0e1a\u0e17\u0e1a\u0e23\u0e23\u0e13\u0e32\u0e18\u0e34\u0e01\u0e32\u0e23', u'http://thaipost.net/taxonomy/term/11/all/feed'),
(u'\u0e40\u0e1b\u0e25\u0e27 \u0e2a\u0e35\u0e40\u0e07\u0e34\u0e19', u'http://thaipost.net/taxonomy/term/2/all/feed'),
(u'\u0e2a\u0e20\u0e32\u0e1b\u0e23\u0e30\u0e0a\u0e32\u0e0a\u0e19', u'http://thaipost.net/taxonomy/term/3/all/feed'),
(u'\u0e16\u0e39\u0e01\u0e17\u0e38\u0e01\u0e02\u0e49\u0e2d', u'http://thaipost.net/taxonomy/term/4/all/feed'),
(u'\u0e01\u0e32\u0e23\u0e40\u0e21\u0e37\u0e2d\u0e07', u'http://thaipost.net/taxonomy/term/5/all/feed'),
(u'\u0e17\u0e48\u0e32\u0e19\u0e02\u0e38\u0e19\u0e19\u0e49\u0e2d\u0e22', u'http://thaipost.net/taxonomy/term/12/all/feed'),
(u'\u0e1a\u0e17\u0e04\u0e27\u0e32\u0e21\u0e1e\u0e34\u0e40\u0e28\u0e29', u'http://thaipost.net/taxonomy/term/66/all/feed'),
(u'\u0e23\u0e32\u0e22\u0e07\u0e32\u0e19\u0e1e\u0e34\u0e40\u0e28\u0e29', u'http://thaipost.net/taxonomy/term/67/all/feed'),
(u'\u0e1a\u0e31\u0e19\u0e17\u0e36\u0e01\u0e2b\u0e19\u0e49\u0e32 4', u'http://thaipost.net/taxonomy/term/13/all/feed'),
(u'\u0e40\u0e2a\u0e35\u0e22\u0e1a\u0e0b\u0e36\u0e48\u0e07\u0e2b\u0e19\u0e49\u0e32', u'http://thaipost.net/taxonomy/term/64/all/feed'),
(u'\u0e04\u0e31\u0e19\u0e1b\u0e32\u0e01\u0e2d\u0e22\u0e32\u0e01\u0e40\u0e25\u0e48\u0e32', u'http://thaipost.net/taxonomy/term/65/all/feed'),
(u'\u0e40\u0e28\u0e23\u0e29\u0e10\u0e01\u0e34\u0e08', u'http://thaipost.net/taxonomy/term/6/all/feed'),
(u'\u0e01\u0e23\u0e30\u0e08\u0e01\u0e44\u0e23\u0e49\u0e40\u0e07\u0e32', u'http://thaipost.net/taxonomy/term/14/all/feed'),
(u'\u0e01\u0e23\u0e30\u0e08\u0e01\u0e2b\u0e31\u0e01\u0e21\u0e38\u0e21', u'http://thaipost.net/taxonomy/term/71/all/feed'),
(u'\u0e04\u0e34\u0e14\u0e40\u0e2b\u0e19\u0e37\u0e2d\u0e01\u0e23\u0e30\u0e41\u0e2a', u'http://thaipost.net/taxonomy/term/69/all/feed'),
(u'\u0e23\u0e32\u0e22\u0e07\u0e32\u0e19', u'http://thaipost.net/taxonomy/term/68/all/feed'),
(u'\u0e2d\u0e34\u0e42\u0e04\u0e42\u0e1f\u0e01\u0e31\u0e2a', u'http://thaipost.net/taxonomy/term/10/all/feed'),
(u'\u0e01\u0e32\u0e23\u0e28\u0e36\u0e01\u0e29\u0e32-\u0e2a\u0e32\u0e18\u0e32\u0e23\u0e13\u0e2a\u0e38\u0e02', u'http://thaipost.net/taxonomy/term/7/all/feed'), # noqa
(u'\u0e15\u0e48\u0e32\u0e07\u0e1b\u0e23\u0e30\u0e40\u0e17\u0e28', u'http://thaipost.net/taxonomy/term/8/all/feed'),
(u'\u0e01\u0e35\u0e2c\u0e32', u'http://thaipost.net/taxonomy/term/9/all/feed')]
def print_version(self, url):
return url.replace(url, 'http://www.thaipost.net/print/' + url[32:])
remove_tags = []
remove_tags.append(dict(name='div', attrs={'class': 'print-logo'}))
remove_tags.append(dict(name='div', attrs={'class': 'print-site_name'}))
remove_tags.append(dict(name='div', attrs={'class': 'print-breadcrumb'}))

View File

@ -1,46 +0,0 @@
__license__ = 'GPL v3'
__copyright__ = '2011, Pat Stapleton <pat.stapleton at gmail.com>'
'''
abc.net.au/news
'''
import re
from calibre.web.feeds.recipes import BasicNewsRecipe
class TheDailyNewsEG(BasicNewsRecipe):
title = u'The Daily News Egypt'
__author__ = 'Omm Mishmishah'
description = 'News from Egypt'
masthead_url = 'http://www.thedailynewsegypt.com/images/DailyNews-03_05.gif'
cover_url = 'http://www.thedailynewsegypt.com/images/DailyNews-03_05.gif'
auto_cleanup = True
oldest_article = 7
max_articles_per_feed = 100
no_stylesheets = False
use_embedded_content = False
encoding = 'utf8'
publisher = 'The Daily News Egypt'
category = 'News, Egypt, World'
language = 'en_EG'
publication_type = 'newsportal'
# preprocess_regexps = [(re.compile(r'<!--.*?-->', re.DOTALL), lambda m: '')]
# Remove annoying map links (inline-caption class is also used for some
# image captions! hence regex to match maps.google)
preprocess_regexps = [(re.compile(
r'<a class="inline-caption" href="http://maps\.google\.com.*?/a>', re.DOTALL), lambda m: '')]
conversion_options = {
'comments': description, 'tags': category, 'language': language, 'publisher': publisher, 'linearize_tables': False
}
keep_only_tags = [dict(attrs={'class': ['article section']})]
remove_tags = [dict(attrs={'class': ['related', 'tags', 'tools', 'attached-content ready',
'inline-content story left', 'inline-content map left contracted', 'published',
'story-map', 'statepromo', 'topics', ]})]
remove_attributes = ['width', 'height']
feeds = [(u'The Daily News Egypt',
u'http://www.thedailynewsegypt.com/rss.php?sectionid=all')]

View File

@ -1,63 +0,0 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__author__ = 'Lorenzo Vigentini and Tom Surace'
__copyright__ = '2009, Lorenzo Vigentini <l.vigentini at gmail.com>, 2013 Tom Surace <tekhedd@byteheaven.net>'
description = 'The Escapist Magazine - v1.3 (2013, April 2013)'
#
# Based on 'the Escapist Magazine - v1.02 (09, January 2010)'
'''
http://www.escapistmagazine.com/
'''
from calibre.web.feeds.news import BasicNewsRecipe
class al(BasicNewsRecipe):
author = 'Lorenzo Vigentini and Tom Surace'
description = 'The Escapist Magazine'
cover_url = 'http://cdn.themis-media.com/themes/escapistmagazine/default/images/logo.png'
title = u'The Escapist Magazine'
publisher = 'Themis Media'
category = 'Video games news, lifestyle, gaming culture'
language = 'en'
timefmt = '[%a, %d %b, %Y]'
oldest_article = 1
max_articles_per_feed = 100
use_embedded_content = False
recursion = 10
remove_javascript = True
no_stylesheets = True
feeds = [
(u'Daily News', u'http://www.escapistmagazine.com/rss/news/0.xml'),
(u'Articles', u'http://www.escapistmagazine.com/rss/articles/0.xml')
]
def print_version(self, url):
# Expect article url in the format:
# http://www.escapistmagazine.com/news/view/123198-article-name?utm_source=rss&utm_medium=rss&utm_campaign=news
#
baseURL = 'http://www.escapistmagazine.com'
segments = url.split('/')
subPath = '/' + segments[3] + '/'
# The article number is the "number" that starts the name
articleNumber = segments[len(segments) - 1] # the "article name"
articleNumber = articleNumber.split('-')[0] # keep part before hyphen
fullUrl = baseURL + subPath + 'print/' + articleNumber
return fullUrl
keep_only_tags = [
dict(name='div', attrs={'id': 'article'})
]
remove_tags = [
dict(name='div', attrs={
'id': ['ad_leaderboard', 'print_notice', 'bottom_panel_container']})
]

View File

@ -1,12 +0,0 @@
from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1365777047(BasicNewsRecipe):
title = u'The Feature'
__author__ = 'Jose Pinto'
language = 'en'
oldest_article = 30
max_articles_per_feed = 100
auto_cleanup = True
use_embedded_content = False
feeds = [(u'Latest', u'http://thefeature.net/rss/links')]

View File

@ -1,74 +0,0 @@
'''
www.philstar.com
'''
import time
from calibre.web.feeds.recipes import BasicNewsRecipe
class Freeman(BasicNewsRecipe):
title = 'The Freeman'
custom_title = "The Freeman - " + time.strftime('%d %b %Y %I:%M %p')
__author__ = 'jde'
__date__ = '31 May 2012'
__version__ = '1.0'
description = 'The Freeman is a daily English-language newspaper published in Cebu, Philippines, by the Philippine Star. It was the first newspaper in Cebu, first published in May 1919. The motto of the newspaper is "The fair and fearless" - philstar.com is a Philippine news and entertainment portal for the Filipino global community. It is the online presence of the STAR Group of Publications, a leading publisher of newspapers and magazines in the Philippines.' # noqa
language = 'en_PH'
publisher = 'The Philippine STAR'
category = 'news, Philippines'
tags = 'news, Philippines'
cover_url = 'http://www.philstar.com/images/logo_Freeman.jpg'
masthead_url = 'http://www.philstar.com/images/logo_Freeman.jpg'
oldest_article = 1.5 # days
max_articles_per_feed = 25
simultaneous_downloads = 10
publication_type = 'newspaper'
timefmt = ' [%a, %d %b %Y %I:%M %p]'
no_stylesheets = True
use_embedded_content = False
encoding = None
recursions = 0
needs_subscription = False
remove_javascript = True
remove_empty_feeds = True
auto_cleanup = False
remove_tags = [dict(name='img', attrs={'id': 'Image1'}) # Logo
# Section (Headlines, Nation, Metro, ...)
# Comments
# View Comments
# Zoom
, dict(name='span', attrs={'id': 'ControlArticle1_LabelHeader'}), dict(name='a', attrs={'id': 'ControlArticle1_FormView1_hlComments'}), dict(name='img', attrs={'src': 'images/post-comments.jpg'}), dict(name='a', attrs={'id': 'ControlArticle1_FormView1_ControlPhotoAndCaption1_hlImageCaption'}) # noqa
]
conversion_options = {'title': custom_title,
'comments': description,
'tags': tags,
'language': language,
'publisher': publisher,
'authors': publisher,
'smarten_punctuation': True
}
feeds = [
('Cebu News' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=107'),
('Freeman Opinion' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=109'),
('Metro Cebu' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=531'),
('Region' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=530'),
('Cebu Business' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=108'),
('Cebu Sports' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=110'),
('Cebu Lifestyle' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=111'),
('Cebu Entertainment' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=51')
]
# process the printer friendly version of article
def print_version(self, url):
return url.replace('/Article', '/ArticlePrinterFriendly')
# obtain title from printer friendly version of article; avoiding
# add_toc_thumbnail changing title when article has image
def populate_article_metadata(self, article, soup, first):
article.title = soup.find(
'span', {'id': 'ControlArticle1_FormView1_ArticleHeaderLabel'}).contents[0].strip()

View File

@ -1,80 +0,0 @@
import time
from calibre.web.feeds.recipes import BasicNewsRecipe
class TheManilaBulletin(BasicNewsRecipe):
title = u'The Manila Bulletin'
custom_title = "The Manila Bulletin - " + \
time.strftime('%d %b %Y %I:%M %p')
__author__ = 'jde'
__date__ = '06 June 2012'
__version__ = '1.0'
description = "The Manila Bulletin, (also known as the Bulletin and previously known as the Manila Daily Bulletin and the Bulletin Today) is the Philippines' largest broadsheet newspaper by circulation." # noqa
language = 'en_PH'
publisher = 'The Manila Bulletin'
category = 'news, Philippines'
tags = 'news, Philippines'
cover_url = 'http://www.mb.com.ph/sites/default/files/mb_logo.jpg'
masthead_url = 'http://www.mb.com.ph/sites/default/files/mb_logo.jpg'
oldest_article = 1.5 # days
max_articles_per_feed = 25
simultaneous_downloads = 20
publication_type = 'newspaper'
timefmt = ' [%a, %d %b %Y %I:%M %p]'
no_stylesheets = True
use_embedded_content = False
encoding = None
recursions = 0
needs_subscription = False
remove_javascript = True
remove_empty_feeds = True
keep_only_tags = [
dict(name='div', attrs={'class': 'article node'}), dict(name='div', attrs={
'class': 'label'}), dict(name='div', attrs={'class': 'content clear-block'})
]
remove_tags = [
dict(name='li', attrs={'class': 'print_html'}),
dict(name='li', attrs={'class': 'print_html first'}),
dict(name='li', attrs={'class': 'print_mail'}),
dict(name='li', attrs={'class': 'print_mail last'}),
dict(name='div', attrs={'class': 'article-sidebar'}), dict(name='table', attrs={'id': 'attachments'})
]
auto_cleanup = False
conversion_options = {'title': custom_title,
'comments': description,
'tags': tags,
'language': language,
'publisher': publisher,
'authors': publisher,
'smarten_punctuation': True
}
feeds = [
# ,
(u'Regional', u'http://www.mb.com.ph/feed/news/regional'),
(u'Main News', u'http://www.mb.com.ph/feed/news/main'),
(u'Business', u'http://www.mb.com.ph/feed/business'),
(u'Sports', u'http://www.mb.com.ph/feed/sports'),
(u'Entertainment', u'http://www.mb.com.ph/feed/entertainment'),
(u'Opinion', u'http://www.mb.com.ph/feed/news/opinion'),
(u'Agriculture', u'http://www.mb.com.ph/feed/news/agriculture'),
(u'Technology', u'http://www.mb.com.ph/feed/lifestyle/technology'),
(u'Lifestyle', u'http://www.mb.com.ph/feed/lifestyle'),
(u'Drive', u'http://www.mb.com.ph/feed/lifestyle/drive'),
]
# if use print version - convert url
# http://www.mb.com.ph/articles/361252/higher-power-rate-looms
# http://www.mb.com.ph/print/361252
#
# def print_version(self,url):
# segments = url.split('/')
# printURL = '/'.join(segments[0:3]) + '/print/' + '/'.join(segments[5])
# return printURL

View File

@ -1,68 +0,0 @@
import time
from calibre.web.feeds.recipes import BasicNewsRecipe
class TheManilaTimes(BasicNewsRecipe):
title = u'The Manila Times'
custom_title = "The Manila Times - " + time.strftime('%d %b %Y %I:%M %p')
__author__ = 'jde'
__date__ = '06 June 2012'
__version__ = '1.0'
description = 'The Manila Times is the oldest existing English language newspaper in the Philippines.'
language = 'en_PH'
publisher = 'The Manila Times'
category = 'news, Philippines'
tags = 'news, Philippines'
cover_url = 'http://www.manilatimes.net/images/banners/logo-mt.png'
masthead_url = 'http://www.manilatimes.net/images/banners/logo-mt.png'
oldest_article = 1.5 # days
max_articles_per_feed = 25
simultaneous_downloads = 20
publication_type = 'newspaper'
timefmt = ' [%a, %d %b %Y %I:%M %p]'
no_stylesheets = True
use_embedded_content = False
encoding = None
recursions = 0
needs_subscription = False
remove_javascript = True
remove_empty_feeds = True
remove_tags = [
dict(name='img', attrs={'alt': 'Print'}), dict(name='img', attrs={
'alt': 'Email:'}), dict(name='dd', attrs={'class': 'hits'})
]
auto_cleanup = True
conversion_options = {'title': custom_title,
'comments': description,
'tags': tags,
'language': language,
'publisher': publisher,
'authors': publisher,
'smarten_punctuation': True
}
feeds = [
(u'Breaking News', u'http://www.manilatimes.net/index.php/news/breaking-news?format=feed&amp;type=rss'),
(u'Top Stories', u'http://www.manilatimes.net/index.php/news/top-stories?format=feed&amp;type=rss'),
(u'Headlines', u'http://www.manilatimes.net/index.php/news/headlines-mt?format=feed&amp;type=rss'),
(u'Nation', u'http://www.manilatimes.net/index.php/news/nation?format=feed&amp;type=rss'),
(u'Regions', u'http://www.manilatimes.net/index.php/news/regions?format=feed&amp;type=rss'),
(u'World', u'http://www.manilatimes.net/index.php/news/world?format=feed&amp;type=rss'),
(u'Top Business News', u'http://www.manilatimes.net/index.php/business/top-business-news?format=feed&amp;type=rss'),
(u'Business Columnist', u'http://www.manilatimes.net/index.php/business/business-columnist?format=feed&amp;type=rss'),
(u'Opinion - Editorials', u'http://www.manilatimes.net/index.php/opinion/editorials?format=feed&amp;type=rss'),
(u'Opinion - Columnist', u'http://www.manilatimes.net/index.php/opinion/columnist1?format=feed&amp;type=rss'),
(u'Opinion - Editorial Cartoon', u'http://www.manilatimes.net/index.php/opinion/editorial-cartoon?format=feed&amp;type=rss'),
(u'Top Sports News', u'http://www.manilatimes.net/index.php/sports/top-sports-news?format=feed&amp;type=rss'),
(u'Sports Columnist', u'http://www.manilatimes.net/index.php/sports/sports-columnist?format=feed&amp;type=rss'),
(u'Life & Times', u'http://www.manilatimes.net/index.php/life-and-times?format=feed&amp;type=rss'),
(u'Showtime', u'http://www.manilatimes.net/index.php/life-and-times/showtime?format=feed&amp;type=rss'),
(u'Sunday Times', u'http://www.manilatimes.net/index.php/sunday-times?format=feed&amp;type=rss'),
(u'Sunday Times Magazine', u'http://www.manilatimes.net/index.php/sunday-times/the-sunday-times-magazines?format=feed&amp;type=rss'),
(u'Motoring News', u'http://www.manilatimes.net/index.php/fast-times/motoring-news?format=feed&amp;type=rss'),
(u'Motoring Columnist', u'http://www.manilatimes.net/index.php/fast-times/motoring-columnist?format=feed&amp;type=rss'),
(u'Technology', u'http://www.manilatimes.net/index.php/technology?format=feed&amp;type=rss')]

View File

@ -1,50 +0,0 @@
__license__ = 'GPL v3'
__copyright__ = '2012, Darko Miletic <darko.miletic at gmail.com>'
'''
www.thenewage.co.za
'''
from calibre.web.feeds.news import BasicNewsRecipe
class TheNewAge_za(BasicNewsRecipe):
title = 'The New Age'
__author__ = 'Darko Miletic'
description = "The New Age newspaper is a national daily newspaper, owned and operated by TNA Media (Pty) Ltd. TNA Media was established in June 2010 and the first publication of The New Age was on 6 December 2010. The New Age covers news from all nine provinces, along with national events, Op-Ed columns, politics, Africa and International news, sports, business, entertainment, lifestyle, science and technology." # noqa
publisher = 'TNA Media (Pty.) Ltd.'
category = 'news, politics, South Africa'
oldest_article = 2
max_articles_per_feed = 200
no_stylesheets = True
encoding = 'utf8'
use_embedded_content = False
auto_cleanup = False
language = 'en_ZA'
remove_empty_feeds = True
publication_type = 'newspaper'
masthead_url = 'http://www.thenewage.co.za/image/tnalogo.png'
extra_css = """
body{font-family: Arial,Verdana,sans-serif }
img{display: block}
.storyheadline{font-size: x-large; font-weight: bold}
"""
conversion_options = {
'comment': description, 'tags': category, 'publisher': publisher, 'language': language
}
remove_tags = [
dict(name=['object', 'embed', 'iframe', 'table', 'meta', 'link'])]
keep_only_tags = [
dict(name='div', attrs={'id': ['dv_headline', 'dv_story_dtls']})]
feeds = [
(u'National', u'http://www.thenewage.co.za/rss.aspx?cat_id=1007'),
(u'Provinces', u'http://www.thenewage.co.za/rss.aspx?cat_id=1008'),
(u'Business', u'http://www.thenewage.co.za/rss.aspx?cat_id=9'),
(u'Sport', u'http://www.thenewage.co.za/rss.aspx?cat_id=10'),
(u'World', u'http://www.thenewage.co.za/rss.aspx?cat_id=1020'),
(u'Africa', u'http://www.thenewage.co.za/rss.aspx?cat_id=1019'),
(u'Science&Tech', u'http://www.thenewage.co.za/rss.aspx?cat_id=1021')
]

View File

@ -1,102 +0,0 @@
'''
www.philstar.com
'''
import time
from calibre.web.feeds.recipes import BasicNewsRecipe
class PhilippineStar(BasicNewsRecipe):
title = 'The Philippine Star'
custom_title = "The Philippine Star - " + \
time.strftime('%d %b %Y %I:%M %p')
__author__ = 'jde'
__date__ = '31 May 2012'
__version__ = '1.0'
description = 'The Philippine Star is a daily English-language broadsheet newspaper based in Manila. It has the most subscribers of any newspaper in the Philippines - philstar.com is a Philippine news and entertainment portal for the Filipino global community. It is the online presence of the STAR Group of Publications, a leading publisher of newspapers and magazines in the Philippines.' # noqa
language = 'en_PH'
publisher = 'The Philippine STAR'
category = 'news, Philippines'
tags = 'news, Philippines'
cover_url = 'http://www.philstar.com/images/philstar-logo-white.jpg'
masthead_url = 'http://www.philstar.com/images/philstar-logo-white.jpg'
oldest_article = 1 # days
max_articles_per_feed = 25
simultaneous_downloads = 20
publication_type = 'newspaper'
timefmt = ' [%a, %d %b %Y %I:%M %p]'
no_stylesheets = True
use_embedded_content = False
encoding = None
recursions = 0
needs_subscription = False
remove_javascript = True
remove_empty_feeds = True
auto_cleanup = False
remove_tags = [dict(name='img', attrs={'id': 'Image1'}) # Logo
# Section (Headlines, Nation, Metro, ...)
# Comments
# View Comments
# Zoom
, dict(name='span', attrs={'id': 'ControlArticle1_LabelHeader'}), dict(name='a', attrs={'id': 'ControlArticle1_FormView1_hlComments'}), dict(name='img', attrs={'src': 'images/post-comments.jpg'}), dict(name='a', attrs={'id': 'ControlArticle1_FormView1_ControlPhotoAndCaption1_hlImageCaption'}) # noqa
]
conversion_options = {'title': custom_title,
'comments': description,
'tags': tags,
'language': language,
'publisher': publisher,
'authors': publisher,
'smarten_punctuation': True
}
feeds = [
('Headlines' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=63'),
('Breaking News' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=200'),
('News Feature' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=68'),
('Nation' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=67'),
('Metro' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=65'),
('Business' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=66'),
('Sports' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=69'),
('Entertainment' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=70'),
('Science & Technology' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=75'),
('Networks' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=71'),
('Business as Usual' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=78'),
('Banking' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=74'),
('Motoring' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=72'),
('Real Estate' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=76'),
('Telecoms' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=73'),
('Agriculture' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=77'),
('Arts & Culture' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=79'),
('Food & Leisure' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=81'),
('Health & Family' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=80'),
('Education & Home' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=442'),
('Travel & Tourism' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=87'),
('Newsmakers' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=88'),
('Business Life' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=82'),
('Fashion & Beauty' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=83'),
('For Men' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=446'),
('Gadgets' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=449'),
('Sunday Life' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=86'),
('Supreme' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=448'),
('Opinion' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=64'),
('Letters to the Editor' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=135'),
('Starweek Magazine' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=90'),
('Modern Living' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=85'),
('YStyle' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=451'),
('Allure' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=89'),
('Weather' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=116')
]
# process the printer friendly version of article
def print_version(self, url):
return url.replace('/Article', '/ArticlePrinterFriendly')
# obtain title from printer friendly version of article; avoiding
# add_toc_thumbnail changing title when article has image
def populate_article_metadata(self, article, soup, first):
article.title = soup.find(
'span', {'id': 'ControlArticle1_FormView1_ArticleHeaderLabel'}).contents[0].strip()

View File

@ -1,46 +0,0 @@
__license__ = 'GPL v3'
__copyright__ = '2008 - 2011, Darko Miletic <darko.miletic at gmail.com>'
'''
www.scotsman.com/the-scotsman
'''
from calibre.web.feeds.news import BasicNewsRecipe
class TheScotsman(BasicNewsRecipe):
title = 'The Scotsman'
__author__ = 'Darko Miletic'
description = 'News from Scotland'
publisher = 'Johnston Publishing Ltd.'
category = 'news, politics, Scotland, UK'
oldest_article = 2
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
language = 'en_GB'
encoding = 'utf-8'
publication_type = 'newspaper'
remove_empty_feeds = True
masthead_url = 'http://www.scotsman.com/webimage/swts_thescotsman_image_e_7_25526!image/3142543874.png_gen/derivatives/default/3142543874.png'
extra_css = 'body{font-family: Arial,Helvetica,sans-serif}'
keep_only_tags = [dict(name='div', attrs={'class': 'article'})]
remove_attributes = ['lang']
conversion_options = {
'comment': description, 'tags': category, 'publisher': publisher, 'language': language
}
feeds = [
('Latest News', 'http://www.scotsman.com/cmlink/1.957140'),
('UK', 'http://www.scotsman.com/cmlink/1.957142'),
('Scotland', 'http://www.scotsman.com/cmlink/1.957141'),
('International', 'http://www.scotsman.com/cmlink/1.957143'),
('Politics', 'http://www.scotsman.com/cmlink/1.957044'),
('Arts', 'http://www.scotsman.com/cmlink/1.1804825'),
('Entertainment', 'http://www.scotsman.com/cmlink/1.957053'),
('Sports', 'http://www.scotsman.com/cmlink/1.957151'),
('Business', 'http://www.scotsman.com/cmlink/1.957156'),
('Features', 'http://www.scotsman.com/cmlink/1.957149'),
('Opinion', 'http://www.scotsman.com/cmlink/1.957054')
]

View File

@ -1,57 +0,0 @@
__license__ = 'GPL v3'
__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
'''
www.getwokingham.co.uk
'''
from calibre.web.feeds.recipes import BasicNewsRecipe
class TheWokinghamTimes(BasicNewsRecipe):
title = 'The Wokingham Times'
__author__ = 'Darko Miletic'
description = 'News from UK'
oldest_article = 2
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
encoding = 'utf8'
publisher = 'The Wokingham Times - S&B media'
category = 'news, UK, world'
language = 'en_GB'
publication_type = 'newsportal'
extra_css = """
body{ font-family: Arial,sans-serif }
img{display: block; margin-bottom: 0.4em}
"""
conversion_options = {
'comments': description, 'tags': category, 'language': language, 'publisher': publisher
}
keep_only_tags = [dict(name='div', attrs={'id': 'article-body'})]
remove_tags = [
dict(name='div', attrs={'class': ['ad']}), dict(name=[
'meta', 'base', 'iframe', 'embed', 'object']), dict(name='span', attrs={'class': 'caption small'})
]
remove_attributes = ['width', 'height', 'lang']
feeds = [
('Home', 'http://www.getwokingham.co.uk/rss.xml'),
('News', 'http://www.getwokingham.co.uk/news/rss.xml'),
('Entertainment', 'http://www.getwokingham.co.uk/entertainment/rss.xml'),
('Lifestyle', 'http://www.getwokingham.co.uk/lifestyle/rss.xml')
]
def preprocess_html(self, soup):
for item in soup.findAll(style=True):
del item['style']
for item in soup.findAll('a'):
if item.string is not None:
str = item.string
item.replaceWith(str)
else:
item.name = 'span'
del item['href']
return soup

View File

@ -1,183 +0,0 @@
#!/usr/bin/env python
from datetime import date
from lxml import etree
__copyright__ = '2015, April King <april@twoevils.org>'
__license__ = 'GPL v3'
__version__ = '1.2'
'''
http://www.thecodelesscode.com/
'''
from calibre.ebooks.BeautifulSoup import BeautifulSoup as bs
from calibre.web.feeds.news import BasicNewsRecipe
class CodelessCode(BasicNewsRecipe):
__author__ = 'April King'
title = u'The Codeless Code'
category = 'fiction, programming, technology'
chapters = {} # ie, Mousetrap -> 182
compress_news_images = True
compress_news_images_max_size = 100
cover_url = 'http://www.thecodelesscode.com/pages/case-9/Lotus-050.jpg'
credits = [u'<h2 class="chapter_title">{0}</h2>'.format(title),
u'<p>By <em>Qi</em></p>',
u'<p>An illustrated collection of (sometimes violent) fables concerning the Art and Philosophy of software development, written in the spirit of Zen kōans</p>', # noqa
u'<p>eBook conversion courtesy of <em>{0}</em></p>'.format(__author__)]
description = u'The Art and Philosophy of software development, written in the spirit of Zen kōans'
extra_css = '.article_date { display: none; float: right; } \
.chapter_title { font-size: 1.75em; margin-top: 0; } \
.chapter_title::first-letter { font-size: 1.35em; font-weight: 500; letter-spacing: -.05em; } \
h2 { margin-top: 0; } \
.image_wrapper { text-align: center; }'
index = 'http://www.thecodelesscode.com/contents'
language = 'en'
max_articles_per_feed = 1000 # I can only wish
path_remappings = {} # IE, /case/182 -> articles_72/index.html
publication_type = 'blog'
publisher = 'Qi'
resolve_internal_links = True
scale_news_images = (600, 400)
simultaneous_downloads = 1
url = 'http://www.thecodelesscode.com'
def parse_index(self):
koans = []
# Retrieve the contents page, containing the ToC
soup = self.index_to_soup(self.index)
for koan in soup.findAll('tr'):
# BS has some trouble with the weird layout
tag = koan.find('a')
if tag is None:
continue
if 'random' in tag['href']:
continue
# Minor coding error causes calibre to glitch; use the current date
# for the most recent title
koan_date = koan.find('td', attrs={'class': 'toc-date'})
if koan_date is None:
koan_date = date.isoformat(date.today())
else:
koan_date = koan_date.string
title = tag.string
url = self.url + tag['href']
if u'The Applicant' in title:
continue # Only the main story
koans.append({
'content': '',
'date': koan_date,
'description': '',
'title': title,
'url': url,
})
# ie, Mousetrap -> 182
self.chapters[title] = url.split('/')[-1]
# Oldest koans first
koans.reverse()
# Log and then get out of here
self.log("Found {0} koans".format(len(koans)))
return([(self.title, koans)])
def preprocess_html(self, soup):
title = soup.find('h1', attrs={'class': 'title'}).find(
'a', attrs={'class': 'subtle'}).string
# Add a title at the beginning of each chapter
if title in self.chapters:
title = '<div class="chapter_title">{0}</div>'.format(title)
# Load up the actual story
koan = soup.find('div', attrs={'class': 'story koan'})
# Kind of a hack-y way to get .children in BS3 <a><b><c></c></b></a>
# -> <b><c></c></b>
contents = list(koan.contents)
koan = bs(title)
for i in reversed(contents):
koan.insert(1, i)
# Remove all anchors that don't contain /case/, leaving them as just their text
# Note that we'll come back and clean up /case/ links when the URLs are remapped
# during postprocess_book()
anchors = koan.findAll('a')
if anchors != []:
for anchor in anchors:
if '/case/' in anchor['href']:
pass
elif 'note' in anchor['href']:
anchor.replaceWith('')
else:
# Again, a hacky way to get the contents of the tag, thanks
# to BS3
contents = list(anchor.contents)
linktext = bs()
for i in reversed(contents):
linktext.insert(1, i)
anchor.replaceWith(linktext)
# Find all the images, and wrap them up in an image_wrapper div
for i in range(0, len(koan.contents), 1):
if not hasattr(koan.contents[i], 'name'):
continue # skip carriage returns
if koan.contents[i].name == u'img':
div = bs('<div class="image_wrapper"></div>')
div.div.insert(0, koan.contents[i])
koan.insert(i, div)
return(koan)
def canonicalize_internal_url(self, url, is_link=True):
url = url.split(self.url)[-1]
return BasicNewsRecipe.canonicalize_internal_url(self, url, is_link=is_link)
def postprocess_book(self, oeb, opts, log):
# Go through each internal representation of each HTML file, and fix
# all the broken hrefs, if possible
for item in oeb.manifest.items:
if item.media_type == 'text/html':
for node in item.data.xpath('//*[@href]'):
naughty_href = node.get('href')
if naughty_href in self.path_remappings:
node.set('href', '../' +
self.path_remappings[naughty_href])
href = node.get('href')
self.log(
"Remapped href {0} --> {1}".format(naughty_href, href))
# Remove the superfluous extra feed page at the beginning of the book, replacing it
# with the proper credits
for item in oeb.manifest.hrefs['index.html'].data.xpath('//*[local-name()="ul"]'):
item.getparent().remove(item)
for item in oeb.manifest.hrefs['index.html'].data.xpath('//*[local-name()="p"]'):
item.getparent().remove(item)
for item in oeb.manifest.hrefs['index.html'].data.xpath('//*[local-name()="div"]'):
for credit in self.credits[::-1]:
item.insert(0, etree.fromstring(credit, parser=etree.XMLParser(recover=True, no_network=True, resolve_entities=False)))
# Change the creator from "calibre" to the actual author
# Also, we don't need the date in the ebook's title
oeb.metadata.items['creator'][0].value = self.publisher
oeb.metadata.items['description'][0].value = oeb.metadata.items[
'description'][0].value.split('\n\nArticles in this issue')[0]
oeb.metadata.items['publication_type'][0].value = self.title
oeb.metadata.items['publisher'][0].value = self.publisher
oeb.metadata.items['title'][0].value = self.title

View File

@ -1,52 +0,0 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
'''
www.livemint.com
'''
from calibre.web.feeds.news import BasicNewsRecipe
class Edgesingapore(BasicNewsRecipe):
title = 'The Edge Singapore'
__author__ = 'Darko Miletic'
description = 'Financial news from Singapore'
publisher = 'The Edge Singapore'
category = 'news, finances, singapore'
language = 'en'
lang = 'en_SG'
oldest_article = 15
max_articles_per_feed = 100
no_stylesheets = True
encoding = 'utf-8'
use_embedded_content = False
extra_css = ' .contentheading{font-size: x-large} .small{font-size: small} .createdate{font-size: small; font-weight: bold} '
conversion_options = {
'comment': description, 'tags': category, 'publisher': publisher, 'author': publisher, 'language': lang, 'pretty_print': True, 'linearize_tables': True
}
remove_tags = [
dict(name=['object', 'link', 'embed', 'form', 'iframe']), dict(name='div', attrs={
'id': 'toolbar-article'}), dict(name='div', attrs={'class': 'backtotop'}), dict(name='img', attrs={'alt': 'Print'})
]
remove_tags_after = dict(name='div', attrs={'class': 'backtotop'})
feeds = [(u'Articles', u'http://feeds.feedburner.com/edgesg')]
def print_version(self, url):
return url + '?tmpl=component&print=1'
def preprocess_html(self, soup):
attribs = ['style', 'font', 'valign', 'colspan', 'width', 'height', 'rowspan', 'summary', 'align', 'cellspacing', 'cellpadding', 'frames', 'rules', 'border' # noqa
]
for item in soup.body.findAll(name=['table', 'td', 'tr', 'th', 'caption', 'thead', 'tfoot', 'tbody', 'colgroup', 'col']):
item.name = 'div'
for attrib in attribs:
item[attrib] = ''
del item[attrib]
return self.adeify_images(soup)

View File

@ -1,34 +0,0 @@
__license__ = 'GPL v3'
__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
'''
luminous-landscape.com
'''
from calibre.web.feeds.news import BasicNewsRecipe
class theluminouslandscape(BasicNewsRecipe):
title = 'The Luminous Landscape'
__author__ = 'Darko Miletic'
description = 'A photography news and information website in the form of a weblog with multiple authors who write on a variety of photography and art-photography related issues.' # noqa
publisher = 'The Luminous Landscape '
category = 'news, blog, photograph, international'
oldest_article = 15
max_articles_per_feed = 100
no_stylesheets = True
remove_empty_feeds = True
use_embedded_content = True
encoding = 'cp1252'
language = 'en'
conversion_options = {
'comment': description, 'tags': category, 'publisher': publisher, 'language': language
}
feeds = [
(u"What's new", u'http://www.luminous-landscape.com/whatsnew/rssfeed.php')]
remove_tags = [dict(name=['object', 'link', 'iframe'])]
def preprocess_html(self, soup):
return self.adeify_images(soup)

View File

@ -1,24 +0,0 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
'''
market-ticker.denninger.net
'''
from calibre.web.feeds.news import BasicNewsRecipe
class Themarketticker(BasicNewsRecipe):
title = 'The Market Ticker'
__author__ = 'Darko Miletic'
description = 'Commentary On The Capital Markets'
oldest_article = 7
max_articles_per_feed = 100
language = 'en'
no_stylesheets = True
use_embedded_content = True
html2lrf_options = ['--comment', description, '--category', 'blog,news,finances', '--base-font-size', '10'
]
feeds = [(u'Posts', u'http://market-ticker.denninger.net/feeds/index.rss2')]

View File

@ -1,42 +0,0 @@
#!/usr/bin/env python
# -*- mode: python -*-
# -*- coding: utf-8 -*-
__license__ = 'GPL v3'
__copyright__ = '2017, Darko Miletic <darko.miletic at gmail.com>'
'''
blog.acolyer.org
'''
from calibre.web.feeds.news import BasicNewsRecipe
class Themorningpaper(BasicNewsRecipe):
title = 'The Morning Paper'
__author__ = 'Darko Miletic'
description = ('an interesting/influential/important paper from'
' the world of CS every weekday morning, as selected by Adrian Colyer')
publisher = 'Adrian Colyer'
category = 'news, tech'
oldest_article = 180
max_articles_per_feed = 200
no_stylesheets = True
encoding = 'utf-8'
use_embedded_content = False
language = 'en'
remove_empty_feeds = True
auto_cleanup = True
publication_type = 'blog'
extra_css = """
body{font-family: Georgia,Palatino,serif }
img{margin-bottom: 0.4em; display:block}
"""
conversion_options = {
'comment': description,
'tags': category,
'publisher': publisher,
'language': language
}
feeds = [(u'Articles', u'https://blog.acolyer.org/feed/')]

View File

@ -1,88 +0,0 @@
from calibre.web.feeds.news import BasicNewsRecipe
class TheNewsRecipe(BasicNewsRecipe):
__license__ = 'GPL v3'
__author__ = 'kwetal'
language = 'en_PK'
version = 1
title = u'The News'
publisher = u'Jang Group'
category = u'News, Pakistan'
description = u'English Newspaper from Pakistan'
use_embedded_content = False
remove_empty_feeds = True
oldest_article = 2
max_articles_per_feed = 100
no_stylesheets = True
remove_javascript = True
encoding = 'iso-8859-1'
remove_tags = []
remove_tags.append(dict(name='img', attrs={'src': 'images/thenews.gif'}))
remove_tags.append(dict(name='img', attrs={'src': 'images/shim.gif'}))
# Feeds from http://thenews.com.pk/rss.asp
feeds = []
feeds.append(
(u'Latest Stories', u'http://www.thenews.com.pk/rss/thenews_updates.xml'))
feeds.append(
(u'Top Stories', u'http://www.thenews.com.pk/rss/thenews_topstories.xml'))
feeds.append(
(u'World News', u'http://www.thenews.com.pk/rss/thenews_world.xml'))
feeds.append(
(u'National News', u'http://www.thenews.com.pk/rss/thenews_national.xml'))
feeds.append(
(u'Business News', u'http://www.thenews.com.pk/rss/thenews_business.xml'))
feeds.append(
(u'Karachi News', u'http://www.thenews.com.pk/rss/thenews_karachi.xml'))
feeds.append(
(u'Lahore News', u'http://www.thenews.com.pk/rss/thenews_lahore.xml'))
feeds.append(
(u'Islamabad News', u'http://www.thenews.com.pk/rss/thenews_islamabad.xml'))
feeds.append(
(u'Peshawar News', u'http://www.thenews.com.pk/rss/thenews_peshawar.xml'))
feeds.append(
(u'Editorial', u'http://www.thenews.com.pk/rss/thenews_editorial.xml'))
feeds.append(
(u'Opinion', u'http://www.thenews.com.pk/rss/thenews_opinion.xml'))
feeds.append(
(u'Sports News', u'http://www.thenews.com.pk/rss/thenews_sports.xml'))
feeds.append(
(u'Newspost', u'http://www.thenews.com.pk/rss/thenews_newspost.xml'))
conversion_options = {'comments': description, 'tags': category, 'language': 'en',
'publisher': publisher, 'linearize_tables': True}
extra_css = '''
body{font-family:verdana,arial,helvetica,geneva,sans-serif;}
.heading_txt {font-size: x-large; font-weight: bold; text-align: left;}
.small_txt {text-align: left;}
.dateline {font-size: x-small; color: #696969; margin-top: 1em; margin-bottom: 1em}
'''
def print_version(self, url):
ignore, sep, main = url.rpartition('/')
if main.startswith('updates.asp'):
return url.replace('updates.asp', 'print.asp')
elif main.startswith('top_story_detail.asp'):
return url.replace('top_story_detail.asp', 'print3.asp')
elif main.startswith('daily_detail.asp'):
return url.replace('daily_detail.asp', 'print1.asp')
else:
return None
def preprocess_html(self, soup):
for tr in soup.findAll('tr', attrs={'bgcolor': True}):
del tr['bgcolor']
td = soup.find('td', attrs={'class': 'small_txt', 'height': '20'})
if td:
del td['height']
td['class'] = 'dateline'
return soup

View File

@ -1,29 +0,0 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
'''
www.theoldfoodie.com
'''
from calibre.web.feeds.news import BasicNewsRecipe
class TheOldFoodie(BasicNewsRecipe):
title = 'The Old Foodie'
__author__ = 'Darko Miletic'
description = 'Food blog'
category = 'cuisine, food, blog'
oldest_article = 30
max_articles_per_feed = 100
use_embedded_content = True
no_stylesheets = True
encoding = 'utf-8'
language = 'en'
conversion_options = {
'comments': description, 'tags': category, 'language': 'en'
}
feeds = [
(u'Articles', u'http://www.theoldfoodie.com/feeds/posts/default?alt=rss')]

View File

@ -1,89 +0,0 @@
__license__ = 'GPL v3'
__copyright__ = '2009-2013, Darko Miletic <darko.miletic at gmail.com>'
'''
theonion.com
'''
from calibre.web.feeds.news import BasicNewsRecipe
class TheOnion(BasicNewsRecipe):
title = 'The Onion'
__author__ = 'Darko Miletic'
description = "The Onion, America's Finest News Source, is an award-winning publication covering world, national, and * local issues. It is updated daily online and distributed weekly in select American cities." # noqa
oldest_article = 2
max_articles_per_feed = 100
publisher = 'Onion, Inc.'
category = 'humor, news, USA'
language = 'en'
no_stylesheets = True
use_embedded_content = False
encoding = 'utf-8'
publication_type = 'newsportal'
needs_subscription = 'optional'
masthead_url = 'http://www.theonion.com/static/onion/img/logo_1x.png'
cover_url = 'http://www.theonion.com/static/onion/img/logo_1x.png'
extra_css = """
body{font-family: Helvetica,Arial,sans-serif}
.section_title{color: gray; text-transform: uppercase}
.title{font-family: Georgia,serif}
.meta{color: gray; display: inline}
.has_caption{display: block}
.caption{font-size: x-small; color: gray; margin-bottom: 0.8em}
"""
conversion_options = {
'comment': description, 'tags': category, 'publisher': publisher, 'language': language
}
keep_only_tags = [
dict(attrs={'class': lambda x: x and 'content-wrapper' in x.split()})]
remove_attributes = ['lang', 'rel']
remove_tags = [
dict(name=['object', 'link', 'iframe', 'base', 'meta', 'button', 'footer', 'blockquote', 'figcaption']), dict(attrs={'class': lambda x: x and 'share-tools' in x.split()}), dict(attrs={'class': lambda x: x and 'content-meta' in x.split()}), dict(attrs={'class': 'below-article-tools'}), dict(name='div', attrs={'id': ['topshare', 'bottomshare']}) # noqa
]
feeds = [
(u'Daily', u'http://feeds.theonion.com/theonion/daily'), (u'Sports',
u'http://feeds.theonion.com/theonion/sports')
]
def get_browser(self):
br = BasicNewsRecipe.get_browser(self)
br.open('http://www.theonion.com/')
if self.username is not None and self.password is not None:
br.open('https://ui.ppjol.com/login/onion/u/j_spring_security_check')
br.select_form(name='f')
br['j_username'] = self.username
br['j_password'] = self.password
br.submit()
return br
def get_article_url(self, article):
artl = BasicNewsRecipe.get_article_url(self, article)
if artl.startswith('http://www.theonion.com/audio/'):
artl = None
return artl
def preprocess_html(self, soup):
for item in soup.findAll(style=True):
del item['style']
for item in soup.findAll('a'):
limg = item.find('img')
if item.string is not None:
str = item.string
item.replaceWith(str)
else:
if limg:
item.name = 'div'
item.attrs = []
if not limg.get('alt'):
limg['alt'] = 'image'
else:
str = self.tag_to_string(item)
item.replaceWith(str)
for item in soup.findAll('img'):
if item.get('data-src'):
item['src'] = item['data-src']
return soup

View File

@ -1,62 +0,0 @@
__license__ = 'GPL v3'
__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
'''
thewest.com.au
'''
import re
from calibre.web.feeds.news import BasicNewsRecipe
class TheWest(BasicNewsRecipe):
title = 'The West Australian'
__author__ = 'Darko Miletic'
description = 'News from Australia'
publisher = 'thewest.com.au'
category = 'news, politics, Australia'
oldest_article = 2
max_articles_per_feed = 200
no_stylesheets = True
encoding = 'utf8'
use_embedded_content = False
language = 'en_AU'
remove_empty_feeds = True
publication_type = 'newspaper'
masthead_url = 'http://l.yimg.com/ao/i/mp/properties/news/02/wan/img/wan-logo-h49.png'
extra_css = ' .article{font-family: Arial,Helvetica,sans-serif } .image{font-size: x-small} '
preprocess_regexps = [
(re.compile(r'</title>.*?</head>', re.DOTALL |
re.IGNORECASE), lambda match: '</title></head>')
]
conversion_options = {
'comment': description, 'tags': category, 'publisher': publisher, 'language': language
}
remove_tags = [
dict(attrs={'class': ['tools', 'lhs']}), dict(attrs={
'id': 'tools-bottom'}), dict(attrs={'href': 'http://twitter.com/thewest_com_au'})
]
keep_only_tags = [dict(attrs={'class': 'mod article'})]
remove_attributes = ['width', 'height']
feeds = [
(u'WA News', u'http://d.yimg.com/au.rss.news.yahoo.com/thewest/wa.xml'),
(u'National', u'http://d.yimg.com/au.rss.news.yahoo.com/thewest/national.xml'),
(u'World', u'http://d.yimg.com/au.rss.news.yahoo.com/thewest/world.xml'),
(u'Offbeat', u'http://d.yimg.com/au.rss.news.yahoo.com/thewest/offbeat.xml'),
(u'Business', u'http://d.yimg.com/au.rss.news.yahoo.com/thewest/business.xml'),
(u'Sport', u'http://d.yimg.com/au.rss.news.yahoo.com/thewest/sport.xml'),
(u'Entertainment', u'http://d.yimg.com/au.rss.news.yahoo.com/thewest/entertainment.xml'),
(u'Travel', u'http://d.yimg.com/au.rss.news.yahoo.com/thewest/travel.xml'),
(u'Life+Style', u'http://d.yimg.com/au.rss.news.yahoo.com/thewest/lifestyle.xml')
]
def get_article_url(self, article):
return article.get('guid', None)
def preprocess_html(self, soup):
return self.adeify_images(soup)

View File

@ -1,13 +0,0 @@
from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1263409732(BasicNewsRecipe):
title = u'Think Progress'
description = u'A compilation of progressive articles on social and economic justice, healthy communities, media accountability, global and domestic security.' # noqa
__author__ = u'Xanthan Gum'
language = 'en'
oldest_article = 7
max_articles_per_feed = 100
feeds = [(u'News Articles', u'http://thinkprogress.org/feed/')]

Some files were not shown because too many files have changed in this diff Show More