mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-07 18:24:30 -04:00
Merge branch 'master' of https://github.com/CoderAllan/calibre
This commit is contained in:
commit
d664b749dc
@ -1,36 +0,0 @@
|
||||
#!/usr/bin/env python2
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2008-2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
www.accountancyage.com
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class AccountancyAge(BasicNewsRecipe):
|
||||
title = 'Accountancy Age'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'business news'
|
||||
publisher = 'accountancyage.com'
|
||||
category = 'news, politics, finances'
|
||||
oldest_article = 2
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
simultaneous_downloads = 1
|
||||
encoding = 'utf-8'
|
||||
lang = 'en'
|
||||
language = 'en'
|
||||
|
||||
feeds = [
|
||||
(u'All News', u'http://feeds.accountancyage.com/rss/latest/accountancyage/all')]
|
||||
|
||||
keep_only_tags = [
|
||||
dict(name='h1'),
|
||||
dict(attrs={'class': 'article_content'}),
|
||||
]
|
||||
|
||||
def get_article_url(self, article):
|
||||
return article.get('guid', None)
|
@ -1,27 +0,0 @@
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2011, Oskar Kunicki <rakso at interia.pl>'
|
||||
'''
|
||||
Changelog:
|
||||
2011-11-27
|
||||
News from BluesRSS.info
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class BluesRSS(BasicNewsRecipe):
|
||||
title = 'Blues News'
|
||||
__author__ = 'Oskar Kunicki'
|
||||
description = 'Blues news from around the world'
|
||||
publisher = 'BluesRSS.info'
|
||||
category = 'news, blues, USA,UK'
|
||||
oldest_article = 5
|
||||
max_articles_per_feed = 100
|
||||
language = 'en'
|
||||
cover_url = 'http://bluesrss.info/cover.jpg'
|
||||
masthead_url = 'http://bluesrss.info/cover.jpg'
|
||||
no_stylesheets = True
|
||||
|
||||
remove_tags = [dict(name='div', attrs={'class': 'wp-pagenavi'})]
|
||||
|
||||
feeds = [(u'News', u'http://bluesrss.info/feed/')]
|
@ -1,61 +0,0 @@
|
||||
#!/usr/bin/env python2
|
||||
__license__ = 'GPL v3'
|
||||
__author__ = 'DrMerry Based on v1.01 by Lorenzo Vigentini'
|
||||
__copyright__ = 'For version 1.02, 1.03: DrMerry'
|
||||
__version__ = 'v1.03'
|
||||
__date__ = '11, July 2011'
|
||||
|
||||
'''
|
||||
http://www.computeractive.co.uk/
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
import re
|
||||
|
||||
|
||||
class computeractive(BasicNewsRecipe):
|
||||
__author__ = 'DrMerry'
|
||||
description = 'Computeractive publishes new downloads, reviews, news stories, step-by-step guides and answers to PC problems every day.'
|
||||
cover_url = 'http://images.pcworld.com/images/common/header/header-logo.gif'
|
||||
|
||||
title = 'Computer act!ve'
|
||||
publisher = 'Incisive media'
|
||||
category = 'PC, video, computing, product reviews, editing, cameras, production'
|
||||
|
||||
language = 'en'
|
||||
timefmt = '[%a, %d %b, %Y]'
|
||||
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 25
|
||||
use_embedded_content = False
|
||||
recursion = 10
|
||||
|
||||
remove_javascript = True
|
||||
no_stylesheets = True
|
||||
remove_empty_feeds = True
|
||||
remove_tags_after = dict(name='div', attrs={'class': 'article_tags_block'})
|
||||
|
||||
keep_only_tags = [
|
||||
dict(name='div', attrs={'id': 'container_left'})
|
||||
]
|
||||
|
||||
remove_tags = [
|
||||
dict(name='div', attrs={'id': ['seeAlsoTags', 'commentsModule', 'relatedArticles',
|
||||
'mainLeft', 'mainRight', 'recent_comment_block_parent', 'reviewDetails']}),
|
||||
dict(name='div', attrs={'class': ['buyIt', 'detailMpu', 'small_section', 'recent_comment_block_parent',
|
||||
'title_right_button_fix', 'section_title.title_right_button_fix', 'common_button']}),
|
||||
dict(name='a', attrs={'class': 'largerImage'})
|
||||
]
|
||||
|
||||
preprocess_regexps = [
|
||||
(re.compile(r'(<a [^>]*>|</a>)', re.DOTALL | re.IGNORECASE),
|
||||
lambda match: ''),
|
||||
]
|
||||
|
||||
feeds = [
|
||||
(u'General content',
|
||||
u'http://feeds.computeractive.co.uk/rss/latest/computeractive/all'),
|
||||
(u'News', u'http://feeds.computeractive.co.uk/rss/latest/computeractive/news'),
|
||||
]
|
||||
|
||||
|
@ -1,61 +0,0 @@
|
||||
#!/usr/bin/env python2
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2010, Starson17'
|
||||
'''
|
||||
www.epicurious.com
|
||||
'''
|
||||
import re
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class Epicurious(BasicNewsRecipe):
|
||||
title = u'Epicurious'
|
||||
__author__ = 'Starson17'
|
||||
description = 'Food and Recipes from Epicurious'
|
||||
cover_url = 'http://up6.podbean.com/image-logos/21849_logo.jpg'
|
||||
publisher = 'Epicurious'
|
||||
tags = 'news, food, gourmet, recipes'
|
||||
language = 'en'
|
||||
use_embedded_content = False
|
||||
no_stylesheets = True
|
||||
remove_javascript = True
|
||||
recursions = 3
|
||||
oldest_article = 14
|
||||
max_articles_per_feed = 20
|
||||
|
||||
keep_only_tags = [dict(name='div', attrs={'class': ['mainconsolewrapper', 'videoheader', 'content_unit', 'entry-content', 'see_more_block']}),
|
||||
dict(name='div', attrs={'id': [
|
||||
'headline', 'introBlock', 'ingredients', 'preparation', 'articleContent', 'in_categories_block']})
|
||||
]
|
||||
|
||||
remove_tags = [{'id': ['printShoppingList', 'addnoteLnk', 'btnUploadVideo', 'enlarge_image']},
|
||||
{'class': ['subLnk', 'sbmWrapper', 'detail_division',
|
||||
'entry-footer', 'comment-footer']},
|
||||
dict(name='div', attrs={'class': ['tagged', 'comments']})
|
||||
]
|
||||
|
||||
remove_tags_after = [dict(name='div', attrs={'class': 'entry-content'})]
|
||||
|
||||
feeds = [
|
||||
(u'Recipes: Healthy dinner ', u'http://feeds.epicurious.com/healthy_recipes'),
|
||||
(u'New Recipes ', u'http://feeds.epicurious.com/newrecipes'),
|
||||
(u'Features ', u'http://feeds.epicurious.com/latestfeatures'),
|
||||
(u'Blogs ', u'http://feeds.feedburner.com/epicurious/epiblog')
|
||||
]
|
||||
|
||||
match_regexps = [
|
||||
r'http://www.epicurious.com/.*recipes/.*/views'
|
||||
]
|
||||
|
||||
preprocess_regexps = [
|
||||
(re.compile(r'/\n', re.DOTALL | re.IGNORECASE), lambda match: '/'),
|
||||
(re.compile(r'_116.jpg', re.DOTALL | re.IGNORECASE), lambda match: '.jpg'),
|
||||
(re.compile('<div class=\"comments\".*</body>',
|
||||
re.DOTALL | re.IGNORECASE), lambda match: '</body>')
|
||||
]
|
||||
|
||||
def postprocess_html(self, soup, first_fetch):
|
||||
for t in soup.findAll(['table', 'tr', 'td']):
|
||||
t.name = 'div'
|
||||
return soup
|
Binary file not shown.
Before Width: | Height: | Size: 607 B |
Binary file not shown.
Before Width: | Height: | Size: 270 B |
Binary file not shown.
Before Width: | Height: | Size: 182 B |
Binary file not shown.
Before Width: | Height: | Size: 820 B |
@ -1,43 +0,0 @@
|
||||
#!/usr/bin/env python2
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
juventudrebelde.co.cu
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class Juventudrebelde_english(BasicNewsRecipe):
|
||||
title = 'Juventud Rebelde in english'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'The newspaper of Cuban Youth'
|
||||
publisher = 'Juventud Rebelde'
|
||||
category = 'news, politics, Cuba'
|
||||
oldest_article = 2
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
encoding = 'iso-8859-1'
|
||||
remove_javascript = True
|
||||
|
||||
html2lrf_options = [
|
||||
'--comment', description, '--category', category, '--publisher', publisher, '--ignore-tables'
|
||||
]
|
||||
|
||||
html2epub_options = 'publisher="' + publisher + \
|
||||
'"\ncomments="' + description + '"\ntags="' + category + '"'
|
||||
|
||||
keep_only_tags = [dict(name='div', attrs={'class': 'read'})]
|
||||
|
||||
feeds = [(u'All news', u'http://www.juventudrebelde.cip.cu/rss/all/')]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
mtag = '<meta http-equiv="Content-Language" content="es-CU"/>'
|
||||
soup.head.insert(0, mtag)
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
return soup
|
||||
|
||||
language = 'en'
|
@ -1,95 +0,0 @@
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||
|
||||
'''
|
||||
Fetch Linuxdevices.
|
||||
'''
|
||||
import re
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class LinuxDevices(BasicNewsRecipe):
|
||||
|
||||
title = u'Linuxdevices'
|
||||
description = 'News about Linux driven Hardware'
|
||||
__author__ = 'Oliver Niesner'
|
||||
use_embedded_content = False
|
||||
timefmt = ' [%a %d %b %Y]'
|
||||
max_articles_per_feed = 50
|
||||
no_stylesheets = True
|
||||
language = 'en'
|
||||
|
||||
remove_javascript = True
|
||||
conversion_options = {'linearize_tables': True}
|
||||
encoding = 'latin1'
|
||||
|
||||
remove_tags_after = [dict(id='intelliTxt')]
|
||||
filter_regexps = [r'ad\.doubleclick\.net']
|
||||
|
||||
remove_tags = [dict(name='div', attrs={'class': 'bannerSuperBanner'}),
|
||||
dict(name='div', attrs={'class': 'bannerSky'}),
|
||||
dict(name='div', attrs={'border': '0'}),
|
||||
dict(name='div', attrs={'class': 'footerLinks'}),
|
||||
dict(name='div', attrs={'class': 'seitenanfang'}),
|
||||
dict(name='td', attrs={'class': 'mar5'}),
|
||||
dict(name='table', attrs={'class': 'pageAktiv'}),
|
||||
dict(name='table', attrs={'class': 'xartable'}),
|
||||
dict(name='table', attrs={'class': 'wpnavi'}),
|
||||
dict(name='table', attrs={'class': 'bgcontent absatz'}),
|
||||
dict(name='table', attrs={'class': 'footer'}),
|
||||
dict(name='table', attrs={'class': 'artikelBox'}),
|
||||
dict(name='table', attrs={'class': 'kommentare'}),
|
||||
dict(name='table', attrs={'class': 'pageBoxBot'}),
|
||||
dict(name='table', attrs={'td': 'height="3"'}),
|
||||
dict(name='table', attrs={'class': 'contentpaneopen'}),
|
||||
dict(name='td', attrs={'nowrap': 'nowrap'}),
|
||||
dict(name='td', attrs={'align': 'left'}),
|
||||
dict(name='td', attrs={'height': '5'}),
|
||||
dict(name='td', attrs={'class': 'ArticleWidgetsHeadline'}),
|
||||
dict(name='div', attrs={
|
||||
'class': 'artikelBox navigatorBox'}),
|
||||
dict(name='div', attrs={'class': 'similar-article-box'}),
|
||||
dict(name='div', attrs={'class': 'videoBigHack'}),
|
||||
dict(name='td', attrs={'class': 'artikelDruckenRight'}),
|
||||
dict(name='td', attrs={'class': 'width="200"'}),
|
||||
dict(name='span', attrs={'class': 'content_rating'}),
|
||||
dict(name='a', attrs={
|
||||
'href': 'http://www.addthis.com/bookmark.php'}),
|
||||
dict(name='a', attrs={'href': '/news'}),
|
||||
dict(name='a', attrs={
|
||||
'href': '/cgi-bin/survey/survey.cgi'}),
|
||||
dict(name='a', attrs={
|
||||
'href': '/cgi-bin/board/UltraBoard.pl'}),
|
||||
dict(name='iframe'),
|
||||
dict(name='form'),
|
||||
dict(name='span', attrs={'class': 'hidePrint'}),
|
||||
dict(id='ArticleWidgets'),
|
||||
dict(id='headerLBox'),
|
||||
dict(id='nointelliTXT'),
|
||||
dict(id='rechteSpalte'),
|
||||
dict(id='newsticker-list-small'),
|
||||
dict(id='ntop5'),
|
||||
dict(id='ntop5send'),
|
||||
dict(id='ntop5commented'),
|
||||
dict(id='nnav-bgheader'),
|
||||
dict(id='nnav-headerteaser'),
|
||||
dict(id='nnav-head'),
|
||||
dict(id='nnav-top'),
|
||||
dict(id='readcomment')]
|
||||
|
||||
feeds = [(u'Linuxdevices', u'http://www.linuxfordevices.com/rss.xml')]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
match = re.compile(r"^Related")
|
||||
for item in soup.findAll('b', text=match):
|
||||
item.extract()
|
||||
for item in soup.findAll(re.compile('^ul')):
|
||||
item.extract()
|
||||
for item in soup.findAll('br', limit=10):
|
||||
item.extract()
|
||||
return soup
|
||||
|
||||
def postprocess_html(self, soup, first):
|
||||
for tag in soup.findAll(name=['table', 'tr', 'td']):
|
||||
tag.name = 'div'
|
||||
return soup
|
@ -1,72 +0,0 @@
|
||||
#!/usr/bin/env python2
|
||||
__license__ = 'GPL v3'
|
||||
__author__ = 'Lorenzo Vigentini'
|
||||
__copyright__ = '2009, Lorenzo Vigentini <l.vigentini at gmail.com>'
|
||||
__version__ = 'v1.01'
|
||||
__date__ = '14, January 2010'
|
||||
|
||||
'''
|
||||
http://www.macvideo.tv/
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
from calibre.ptempfile import PersistentTemporaryFile
|
||||
|
||||
temp_files = []
|
||||
articles_are_obfuscated = True
|
||||
|
||||
|
||||
class macVideo(BasicNewsRecipe):
|
||||
__author__ = 'Lorenzo Vigentini'
|
||||
description = 'MacVideo is an independent journal not affiliated with Apple Computer, It is a publication of IDG Communication focusing on video production and editing.' # noqa
|
||||
cover_url = 'http://www.macvideo.tv/images/shared/macvideo-logo.jpg'
|
||||
|
||||
title = 'MacVideo '
|
||||
publisher = 'IDG Communication'
|
||||
category = 'Apple, Mac, video, computing, product reviews, editing, cameras, production'
|
||||
|
||||
language = 'en'
|
||||
encoding = 'cp1252'
|
||||
timefmt = '[%a, %d %b, %Y]'
|
||||
|
||||
oldest_article = 30
|
||||
max_articles_per_feed = 25
|
||||
use_embedded_content = False
|
||||
recursion = 10
|
||||
|
||||
remove_javascript = True
|
||||
no_stylesheets = True
|
||||
|
||||
def get_obfuscated_article(self, url):
|
||||
br = self.get_browser()
|
||||
br.open(url + '&print')
|
||||
|
||||
response = br.follow_link(url, nr=0)
|
||||
html = response.read()
|
||||
|
||||
self.temp_files.append(PersistentTemporaryFile('_fa.html'))
|
||||
self.temp_files[-1].write(html)
|
||||
self.temp_files[-1].close()
|
||||
return self.temp_files[-1].name
|
||||
|
||||
keep_only_tags = [
|
||||
dict(name='div', attrs={'id': 'mainContent'})
|
||||
]
|
||||
|
||||
remove_tags = [
|
||||
dict(name='div', attrs={'class': ['submissionBar', 'mpuContainer']}),
|
||||
dict(name='p', attrs={'class': 'articlePag'}),
|
||||
dict(name='ul', attrs={'id': 'articleIconsList'})
|
||||
]
|
||||
|
||||
feeds = [
|
||||
(u'News', u'http://www.macvideo.tv/rss/feeds/macvideo-news.xml'),
|
||||
(u'Reviews', u'http://www.macvideo.tv/rss/feeds/macvideo-reviews.xml'),
|
||||
(u'Interviews', u'http://www.macvideo.tv/rss/feeds/macvideo-features-interviews.xml'),
|
||||
(u'Features', u'http://www.macvideo.tv/rss/feeds/macvideo-features-features.xml'),
|
||||
(u'Rick Young', u'http://www.macvideo.tv/rss/feeds/blog100140.xml'),
|
||||
(u'Matt Davis', u'http://www.macvideo.tv/rss/feeds/blog101658.xml'),
|
||||
(u'Adrian Miskelly',
|
||||
u'http://www.macvideo.tv/rss/feeds/blog101750.xml')
|
||||
]
|
||||
|
@ -1,47 +0,0 @@
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2009-2011, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
www.moneynews.com
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class MoneyNews(BasicNewsRecipe):
|
||||
title = 'Moneynews.com'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'Financial news worldwide'
|
||||
publisher = 'Newsmax.com'
|
||||
language = 'en'
|
||||
category = 'news, finances, USA, business'
|
||||
oldest_article = 2
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
encoding = 'utf8'
|
||||
extra_css = 'img{display: block} body{font-family: Arial, Helvetica, sans-serif}'
|
||||
|
||||
conversion_options = {
|
||||
'comment': description, 'tags': category, 'publisher': publisher, 'language': language, 'linearize_tables': True
|
||||
}
|
||||
|
||||
feeds = [
|
||||
|
||||
(u'Street Talk', u'http://www.moneynews.com/rss/StreetTalk/8.xml'),
|
||||
(u'Finance News', u'http://www.moneynews.com/rss/FinanceNews/4.xml'),
|
||||
(u'Economy', u'http://www.moneynews.com/rss/Economy/2.xml'),
|
||||
(u'Companies', u'http://www.moneynews.com/rss/Companies/6.xml'),
|
||||
(u'Markets', u'http://www.moneynews.com/rss/Markets/7.xml'),
|
||||
(u'Investing & Analysis', u'http://www.moneynews.com/rss/InvestingAnalysis/17.xml')
|
||||
]
|
||||
|
||||
keep_only_tags = [dict(name='div', attrs={'class': 'copy'})]
|
||||
|
||||
remove_tags = [
|
||||
dict(attrs={'class': ['MsoNormal', 'MsoNoSpacing']}),
|
||||
dict(name=['object', 'link', 'embed', 'form', 'meta'])
|
||||
]
|
||||
|
||||
def print_version(self, url):
|
||||
nodeid = url.rpartition('/')[2]
|
||||
return 'http://www.moneynews.com/PrintTemplate?nodeid=' + nodeid
|
@ -1,23 +0,0 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class OpenLeft(BasicNewsRecipe):
|
||||
# Information about the recipe
|
||||
|
||||
title = 'Open Left'
|
||||
description = 'Progressive American commentary on current events'
|
||||
category = 'news, commentary'
|
||||
language = 'en'
|
||||
__author__ = 'Xanthan Gum'
|
||||
|
||||
# Fetch no article older than seven days
|
||||
|
||||
oldest_article = 7
|
||||
|
||||
# Fetch no more than 100 articles
|
||||
|
||||
max_articles_per_feed = 100
|
||||
|
||||
# Fetch the articles from the RSS feed
|
||||
|
||||
feeds = [(u'Articles', u'http://www.openleft.com/rss/rss2.xml')]
|
@ -1,114 +0,0 @@
|
||||
import re
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
|
||||
|
||||
class RichmondTimesDispatch(BasicNewsRecipe):
|
||||
title = u'Richmond Times-Dispatch'
|
||||
description = "The Richmond Times-Dispatch is the primary daily newspaper in Richmond, \
|
||||
the capital of Virginia, United States, as well as the Virginia cities of Petersburg, \
|
||||
Chester. Hopewell, Colonial Heights, Charlottesville, Lynchburg, Waynesboro, \
|
||||
and is also a default paper for rural regions of the state. \
|
||||
The RTD has published in some form for more than 150 years."
|
||||
__author__ = '_reader'
|
||||
__date__ = '17 October 2012'
|
||||
__version__ = '1.6'
|
||||
cover_url = 'http://static2.dukecms.com/va_tn/timesdispatch_com/site-media/img/icons/logo252x97.png'
|
||||
masthead_url = 'http://static2.dukecms.com/va_tn/timesdispatch_com/site-media/img/icons/logo252x97.png'
|
||||
language = 'en'
|
||||
oldest_article = 1.5 # days
|
||||
max_articles_per_feed = 100
|
||||
ignore_duplicate_articles = {'title', 'url'}
|
||||
needs_subscription = False
|
||||
publisher = 'timesdispatch.com'
|
||||
category = 'news, commentary'
|
||||
tags = 'news'
|
||||
publication_type = 'newspaper'
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
encoding = None
|
||||
simultaneous_downloads = 20
|
||||
recursions = 0
|
||||
remove_javascript = True
|
||||
remove_empty_feeds = True
|
||||
auto_cleanup = False
|
||||
|
||||
conversion_options = {
|
||||
'comments': description,
|
||||
'tags': tags,
|
||||
'language': language,
|
||||
'publisher': publisher,
|
||||
'authors': publisher,
|
||||
'smarten_punctuation': True
|
||||
}
|
||||
|
||||
remove_tags_before = dict(id='hnews hentry item')
|
||||
|
||||
remove_tags_after = dict(name='hr')
|
||||
|
||||
remove_tags = [
|
||||
dict(name='div', attrs={'id': ['mg_hd', 'mg_ft', 'sr_b', 'comments_left', 'comments_right']}), dict(name='div', attrs={'class': ['bottom_social', 'article_bottom']}), dict( name='table', attrs={'class': ['ap-mediabox-table', 'ap-htmltable-table', 'ap-photogallery-table', 'ap-htmlfragment-table']}) # noqa
|
||||
]
|
||||
|
||||
preprocess_regexps = [
|
||||
(re.compile(r'<table class="ap-story-table hnews hentry item".*?<td class="ap-story-td">',
|
||||
re.DOTALL | re.IGNORECASE), lambda match: ''),
|
||||
(re.compile(r'<p>\s*http://www2.timesdispatch.*?</p>',
|
||||
re.DOTALL | re.IGNORECASE), lambda match: ''),
|
||||
(re.compile(r'<p>\s*<img src="http://static2.dukecms.*?</p>',
|
||||
re.DOTALL | re.IGNORECASE), lambda match: ''),
|
||||
(re.compile(r'<p>\s*<a href="http://www2.timesdispatch.*?</p>',
|
||||
re.DOTALL | re.IGNORECASE), lambda match: ''),
|
||||
(re.compile(r'<hr.*?>', re.DOTALL | re.IGNORECASE),
|
||||
lambda match: ''), # strip <hr /> line break
|
||||
(re.compile(r'<a\s*rel="item-license.*?Use</a>.', re.DOTALL |
|
||||
re.IGNORECASE), lambda match: ''), # strip <hr /> line break
|
||||
(re.compile(r'<small>\s*Richmond Times-Dispatch.*?</small>', re.DOTALL |
|
||||
re.IGNORECASE), lambda match: ''), # strip <hr /> line break
|
||||
]
|
||||
|
||||
feeds = [
|
||||
('News', 'http://www2.timesdispatch.com/list/feed/rss/news-archive'),
|
||||
('Breaking News', 'http://www2.timesdispatch.com/list/feed/rss/breaking-news'),
|
||||
('National News', 'http://www2.timesdispatch.com/list/feed/rss/national-news'),
|
||||
('Local News', 'http://www2.timesdispatch.com/list/feed/rss/local-news'),
|
||||
('Business', 'http://www2.timesdispatch.com/list/feed/rss/business'),
|
||||
('Local Business', 'http://www2.timesdispatch.com/list/feed/rss/local-business'),
|
||||
('Politics', 'http://www2.timesdispatch.com/list/feed/rss/politics'),
|
||||
('Virginia Politics',
|
||||
'http://www2.timesdispatch.com/list/feed/rss/virginia-politics'),
|
||||
('History', 'http://www2.timesdispatch.com/feed/rss/special_section/news/history'),
|
||||
('Sports', 'http://www2.timesdispatch.com/list/feed/rss/sports2'),
|
||||
('Health', 'http://www2.timesdispatch.com/feed/rss/lifestyles/health_med_fit/'),
|
||||
('Entertainment/Life', 'http://www2.timesdispatch.com/list/feed/rss/entertainment'),
|
||||
('Arts/Theatre',
|
||||
'http://www2.timesdispatch.com/feed/rss/entertainment/arts_theatre/'),
|
||||
('Movies', 'http://www2.timesdispatch.com/list/feed/rss/movies'),
|
||||
('Music', 'http://www2.timesdispatch.com/list/feed/rss/music'),
|
||||
('Dining & Food', 'http://www2.timesdispatch.com/list/feed/rss/dining'),
|
||||
('Home & Garden', 'http://www2.timesdispatch.com/list/feed/rss/home-and-garden/'),
|
||||
('Travel', 'http://www2.timesdispatch.com/feed/rss/travel/'),
|
||||
('Opinion', 'http://www2.timesdispatch.com/feed/rss/news/opinion/'),
|
||||
('Editorials', 'http://www2.timesdispatch.com/list/feed/rss/editorial-desk'),
|
||||
('Columnists and Blogs',
|
||||
'http://www2.timesdispatch.com/list/feed/rss/news-columnists-blogs'),
|
||||
('Opinion Columnists',
|
||||
'http://www2.timesdispatch.com/list/feed/rss/opinion-editorial-columnists'),
|
||||
('Letters to the Editor',
|
||||
'http://www2.timesdispatch.com/list/feed/rss/opinion-letters'),
|
||||
('Traffic', 'http://www2.timesdispatch.com/list/feed/rss/traffic'),
|
||||
('Drives', 'http://www2.timesdispatch.com/feed/rss/classifieds/transportation/'),
|
||||
|
||||
]
|
||||
|
||||
def print_version(self, url):
|
||||
article_num = re.sub(r'(^.*)\-([0-9]{4,10})\/$', r'\g<2>', url)
|
||||
ap_pat = re.compile('http')
|
||||
# print '\nDEBUG>>>>>>>>: article_num: ', article_num
|
||||
# print 'DEBUG>>>>>>>>: ap_pat.search(article_num): ',
|
||||
# ap_pat.search(article_num)
|
||||
if ap_pat.search(article_num): # AP article, no print url
|
||||
# print 'DEBUG>>>>>>>>: AP URL: ', url
|
||||
return url
|
||||
else:
|
||||
printURL = 'http://www2.timesdispatch.com/member-center/share-this/print/?content=ar' + article_num
|
||||
return printURL
|
@ -1,31 +0,0 @@
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2010, Nadid <nadid.skywalker at gmail.com>'
|
||||
'''
|
||||
http://www.sinfest.net
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class SinfestBig(BasicNewsRecipe):
|
||||
title = 'Sinfest'
|
||||
__author__ = 'nadid'
|
||||
description = 'Sinfest'
|
||||
reverse_article_order = False
|
||||
oldest_article = 5
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = True
|
||||
encoding = 'utf-8'
|
||||
publisher = 'Tatsuya Ishida/Museworks'
|
||||
category = 'comic'
|
||||
language = 'en'
|
||||
|
||||
conversion_options = {
|
||||
'comments': description, 'tags': category, 'language': language, 'publisher': publisher
|
||||
}
|
||||
|
||||
feeds = [(u'SinFest', u'http://henrik.nyh.se/scrapers/sinfest.rss')]
|
||||
|
||||
def get_article_url(self, article):
|
||||
return article.get('link')
|
@ -1,39 +0,0 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class AdvancedUserRecipe1278049615(BasicNewsRecipe):
|
||||
title = u'Statesman'
|
||||
pubisher = 'http://www.statesman.com/'
|
||||
description = 'Austin Texas Daily Newspaper'
|
||||
category = 'News, Austin, Texas'
|
||||
__author__ = 'rty'
|
||||
oldest_article = 3
|
||||
|
||||
max_articles_per_feed = 100
|
||||
|
||||
feeds = [(u'News',
|
||||
u'http://www.statesman.com/section-rss.do?source=news&includeSubSections=true'),
|
||||
(u'Local', u'http://www.statesman.com/section-rss.do?source=local&includeSubSections=true'),
|
||||
(u'Business', u'http://www.statesman.com/section-rss.do?source=business&includeSubSections=true'),
|
||||
(u'Life', u'http://www.statesman.com/section-rss.do?source=life&includesubsection=true'),
|
||||
(u'Editorial', u'http://www.statesman.com/section-rss.do?source=opinion&includesubsections=true'),
|
||||
(u'Sports', u'http://www.statesman.com/section-rss.do?source=sports&includeSubSections=true')
|
||||
]
|
||||
masthead_url = "http://www.statesman.com/images/cmg-logo.gif"
|
||||
|
||||
remove_javascript = True
|
||||
use_embedded_content = False
|
||||
no_stylesheets = True
|
||||
language = 'en'
|
||||
encoding = 'utf-8'
|
||||
conversion_options = {'linearize_tables': True}
|
||||
remove_tags = [
|
||||
dict(name='div', attrs={'id': 'cxArticleOptions'}),
|
||||
{'class': ['perma', 'comments', 'trail', 'share-buttons',
|
||||
'toggle_show_on']},
|
||||
]
|
||||
keep_only_tags = [
|
||||
dict(name='div', attrs={'class': 'cxArticleHeader'}),
|
||||
dict(name='div', attrs={'id': ['cxArticleBodyText',
|
||||
'content']}),
|
||||
]
|
@ -1,53 +0,0 @@
|
||||
#!/usr/bin/env python2
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
utne.com
|
||||
'''
|
||||
|
||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class Utne(BasicNewsRecipe):
|
||||
title = u'Utne reader'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'News'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
language = 'en'
|
||||
|
||||
cover_url = 'http://www.utne.com/images/template/logo.gif'
|
||||
|
||||
remove_tags = [
|
||||
dict(name='a', attrs={'id': 'ctl00_blankmaster_lnkBanner'}), dict(
|
||||
name='object')
|
||||
]
|
||||
|
||||
feeds = [
|
||||
|
||||
(u'Politics', u'http://www.utne.com/rss/Politics.xml'),
|
||||
(u'Environment', u'http://www.utne.com/rss/Environment.xml'),
|
||||
(u'Media', u'http://www.utne.com/rss/Media.xml'),
|
||||
(u'Great writing', u'http://www.utne.com/rss/Great-Writing.xml'),
|
||||
(u'Science & Technology', u'http://www.utne.com/rss/Science-Technology.xml'),
|
||||
(u'Arts', u'http://www.utne.com/rss/Arts.xml')
|
||||
]
|
||||
|
||||
def print_version(self, url):
|
||||
raw = self.browser.open(url).read()
|
||||
soup = BeautifulSoup(raw.decode('utf8', 'replace'))
|
||||
print_link = soup.find(
|
||||
'a', {'id': 'ctl00_defaultmaster_Blog_tools1_lnkPrint'})
|
||||
if print_link is None:
|
||||
return url
|
||||
return print_link['href']
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
mtag = '<meta http-equiv="Content-Type" content="text/html; charset=utf-8">'
|
||||
soup.head.insert(0, mtag)
|
||||
del(soup.body['onload'])
|
||||
return soup
|
@ -1,116 +0,0 @@
|
||||
#!/usr/bin/env python2
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2010, Starson17'
|
||||
'''
|
||||
www.nbcolympics.com
|
||||
'''
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class Olympics_2010(BasicNewsRecipe):
|
||||
title = u'NBC Olympics 2010'
|
||||
__author__ = 'Starson17'
|
||||
description = 'Olympics 2010'
|
||||
cover_url = 'http://www.digitaljournal.com/img/1/1/2/1/i/4/7/6/o/WinterOlympics2010-logo.jpg'
|
||||
publisher = 'Olympics 2010'
|
||||
tags = 'Olympics news'
|
||||
language = 'en'
|
||||
use_embedded_content = False
|
||||
no_stylesheets = True
|
||||
remove_javascript = True
|
||||
# recursions = 3
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 10
|
||||
|
||||
keep_only_tags = [dict(name='div', attrs={'class': ['Article ', 'ArticleGallery']}),
|
||||
]
|
||||
|
||||
remove_tags = [dict(name='div', attrs={'id': ['RelatedTagsBox', 'ShareBox']}),
|
||||
dict(name='div', attrs={
|
||||
'class': ['DateUtilities', 'PhotoGallery BoxRight', 'Frame', 'ToolBox']}),
|
||||
]
|
||||
|
||||
# RSS feeds are at: http://www.nbcolympics.com/rss/index.html
|
||||
feeds = [
|
||||
('NBCOlympics.com - News',
|
||||
'http://www.nbcolympics.com/rss/newscenter/mostpopular.xml'),
|
||||
('NBCOlympics.com - News - Top Stories',
|
||||
'http://www.nbcolympics.com/rss/newscenter/topstories.xml'),
|
||||
('NBCOlympics.com - News - Latest Headlines',
|
||||
'http://www.nbcolympics.com/rss/newscenter/latestnews.xml'),
|
||||
# ('NBCOlympics.com - Photos', 'http://www.nbcolympics.com/rss/photos/mostpopular.xml'),
|
||||
# ('NBCOlympics.com - Photos - Editorial Picks', 'http://www.nbcolympics.com/rss/photos/editorialpicks.xml'),
|
||||
# ('NBCOlympics.com - Photos - Latest Slideshows', 'http://www.nbcolympics.com/rss/photos/latestslideshows.xml'),
|
||||
('NBCOlympics.com - Team USA - Latest news',
|
||||
'http://www.nbcolympics.com/rss/countries/team-usa/index.xml'),
|
||||
# ('NBCOlympics.com - Team USA - Latest Slideshows', 'http://www.nbcolympics.com/rss/countries/team-usa/photos/index.xml'),
|
||||
# ('NBCOlympics.com - Team USA - Video', 'http://www.nbcolympics.com/rss/countries/team-usa/video/index.xml'),
|
||||
# ('NBCOlympics.com - Alpine Skiing - Most Popular News', 'http://www.nbcolympics.com/rss/sport=AS/mostpopular.xml'),
|
||||
# ('NBCOlympics.com - Alpine Skiing - Top News', 'http://www.nbcolympics.com/rss/sport=AS/topnews.xml'),
|
||||
('NBCOlympics.com - Alpine Skiing - Latest News',
|
||||
'http://www.nbcolympics.com/rss/sport=AS/latestnews.xml'),
|
||||
# ('NBCOlympics.com - Biathlon - Most Popular News', 'http://www.nbcolympics.com/rss/sport=BT/mostpopular.xml'),
|
||||
# ('NBCOlympics.com - Biathlon - Top News', 'http://www.nbcolympics.com/rss/sport=BT/topnews.xml'),
|
||||
('NBCOlympics.com - Biathlon - Latest News',
|
||||
'http://www.nbcolympics.com/rss/sport=BT/latestnews.xml'),
|
||||
# ('NBCOlympics.com - Bobsled - Most Popular News', 'http://www.nbcolympics.com/rss/sport=BS/mostpopular.xml'),
|
||||
# ('NBCOlympics.com - Bobsled - Top News', 'http://www.nbcolympics.com/rss/sport=BS/topnews.xml'),
|
||||
('NBCOlympics.com - Bobsled - Latest News',
|
||||
'http://www.nbcolympics.com/rss/sport=BS/latestnews.xml'),
|
||||
# ('NBCOlympics.com - Cross-Country - Most Popular News', 'http://www.nbcolympics.com/rss/sport=CC/mostpopular.xml'),
|
||||
# ('NBCOlympics.com - Cross-Country - Top News', 'http://www.nbcolympics.com/rss/sport=CC/topnews.xml'),
|
||||
('NBCOlympics.com - Cross-Country - Latest News',
|
||||
'http://www.nbcolympics.com/rss/sport=CC/latestnews.xml'),
|
||||
# ('NBCOlympics.com - Curling - Most Popular News', 'http://www.nbcolympics.com/rss/sport=CU/mostpopular.xml'),
|
||||
# ('NBCOlympics.com - Curling - Top News', 'http://www.nbcolympics.com/rss/sport=CU/topnews.xml'),
|
||||
('NBCOlympics.com - Curling - Latest News',
|
||||
'http://www.nbcolympics.com/rss/sport=CU/latestnews.xml'),
|
||||
# ('NBCOlympics.com - Figure Skating - Most Popular News', 'http://www.nbcolympics.com/rss/sport=FS/mostpopular.xml'),
|
||||
# ('NBCOlympics.com - Figure Skating - Top News', 'http://www.nbcolympics.com/rss/sport=FS/topnews.xml'),
|
||||
('NBCOlympics.com - Figure Skating - Latest News',
|
||||
'http://www.nbcolympics.com/rss/sport=FS/latestnews.xml'),
|
||||
# ('NBCOlympics.com - Freestyle Skiing - Most Popular News', 'http://www.nbcolympics.com/rss/sport=FR/mostpopular.xml'),
|
||||
# ('NBCOlympics.com - Freestyle Skiing - Top News', 'http://www.nbcolympics.com/rss/sport=FR/topnews.xml'),
|
||||
('NBCOlympics.com - Freestyle Skiing - Latest News',
|
||||
'http://www.nbcolympics.com/rss/sport=FR/latestnews.xml'),
|
||||
# ('NBCOlympics.com - Hockey - Most Popular News', 'http://www.nbcolympics.com/rss/sport=IH/mostpopular.xml'),
|
||||
# ('NBCOlympics.com - Hockey - Top News', 'http://www.nbcolympics.com/rss/sport=IH/topnews.xml'),
|
||||
('NBCOlympics.com - Hockey - Latest News',
|
||||
'http://www.nbcolympics.com/rss/sport=IH/latestnews.xml'),
|
||||
# ('NBCOlympics.com - Luge - Most Popular News', 'http://www.nbcolympics.com/rss/sport=LG/mostpopular.xml'),
|
||||
# ('NBCOlympics.com - Luge - Top News', 'http://www.nbcolympics.com/rss/sport=LG/topnews.xml'),
|
||||
('NBCOlympics.com - Luge - Latest News',
|
||||
'http://www.nbcolympics.com/rss/sport=LG/latestnews.xml'),
|
||||
# ('NBCOlympics.com - Nordic Combined - Most Popular News', 'http://www.nbcolympics.com/rss/sport=NC/mostpopular.xml'),
|
||||
# ('NBCOlympics.com - Nordic Combined - Top News', 'http://www.nbcolympics.com/rss/sport=NC/topnews.xml'),
|
||||
('NBCOlympics.com - Nordic Combined - Latest News',
|
||||
'http://www.nbcolympics.com/rss/sport=NC/latestnews.xml'),
|
||||
# ('NBCOlympics.com - Short Track - Most Popular News', 'http://www.nbcolympics.com/rss/sport=ST/mostpopular.xml'),
|
||||
# ('NBCOlympics.com - Short Track - Top News', 'http://www.nbcolympics.com/rss/sport=ST/topnews.xml'),
|
||||
('NBCOlympics.com - Short Track - Latest News',
|
||||
'http://www.nbcolympics.com/rss/sport=ST/latestnews.xml'),
|
||||
# ('NBCOlympics.com - Skeleton - Most Popular News', 'http://www.nbcolympics.com/rss/sport=SN/mostpopular.xml'),
|
||||
# ('NBCOlympics.com - Skeleton - Top News', 'http://www.nbcolympics.com/rss/sport=SN/topnews.xml'),
|
||||
('NBCOlympics.com - Skeleton - Latest News',
|
||||
'http://www.nbcolympics.com/rss/sport=SN/latestnews.xml'),
|
||||
# ('NBCOlympics.com - Ski Jumping - Most Popular News', 'http://www.nbcolympics.com/rss/sport=SJ/mostpopular.xml'),
|
||||
# ('NBCOlympics.com - Ski Jumping - Top News', 'http://www.nbcolympics.com/rss/sport=SJ/topnews.xml'),
|
||||
('NBCOlympics.com - Ski Jumping - Latest News',
|
||||
'http://www.nbcolympics.com/rss/sport=SJ/latestnews.xml'),
|
||||
# ('NBCOlympics.com - Snowboarding - Most Popular News', 'http://www.nbcolympics.com/rss/sport=SB/mostpopular.xml'),
|
||||
# ('NBCOlympics.com - Snowboarding - Top News', 'http://www.nbcolympics.com/rss/sport=SB/topnews.xml'),
|
||||
('NBCOlympics.com - Snowboarding - Latest News',
|
||||
'http://www.nbcolympics.com/rss/sport=SB/latestnews.xml'),
|
||||
# ('NBCOlympics.com - Speed Skating - Most Popular News', 'http://www.nbcolympics.com/rss/sport=AS/mostpopular.xml'),
|
||||
# ('NBCOlympics.com - Speed Skating - Top News', 'http://www.nbcolympics.com/rss/sport=AS/topnews.xml'),
|
||||
('NBCOlympics.com - Speed Skating - Latest News',
|
||||
'http://www.nbcolympics.com/rss/sport=AS/latestnews.xml'),
|
||||
]
|
||||
|
||||
extra_css = '''
|
||||
h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
|
||||
h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
|
||||
p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
|
||||
body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
|
||||
'''
|
Loading…
x
Reference in New Issue
Block a user