diff --git a/recipes/accountancyage.recipe b/recipes/accountancyage.recipe deleted file mode 100644 index 3db45dce64..0000000000 --- a/recipes/accountancyage.recipe +++ /dev/null @@ -1,36 +0,0 @@ -#!/usr/bin/env python2 - -__license__ = 'GPL v3' -__copyright__ = '2008-2009, Darko Miletic ' -''' -www.accountancyage.com -''' - -from calibre.web.feeds.news import BasicNewsRecipe - - -class AccountancyAge(BasicNewsRecipe): - title = 'Accountancy Age' - __author__ = 'Darko Miletic' - description = 'business news' - publisher = 'accountancyage.com' - category = 'news, politics, finances' - oldest_article = 2 - max_articles_per_feed = 100 - no_stylesheets = True - use_embedded_content = False - simultaneous_downloads = 1 - encoding = 'utf-8' - lang = 'en' - language = 'en' - - feeds = [ - (u'All News', u'http://feeds.accountancyage.com/rss/latest/accountancyage/all')] - - keep_only_tags = [ - dict(name='h1'), - dict(attrs={'class': 'article_content'}), - ] - - def get_article_url(self, article): - return article.get('guid', None) diff --git a/recipes/blues.recipe b/recipes/blues.recipe deleted file mode 100644 index 5ed7cef592..0000000000 --- a/recipes/blues.recipe +++ /dev/null @@ -1,27 +0,0 @@ -__license__ = 'GPL v3' -__copyright__ = '2011, Oskar Kunicki ' -''' -Changelog: -2011-11-27 -News from BluesRSS.info -''' - -from calibre.web.feeds.news import BasicNewsRecipe - - -class BluesRSS(BasicNewsRecipe): - title = 'Blues News' - __author__ = 'Oskar Kunicki' - description = 'Blues news from around the world' - publisher = 'BluesRSS.info' - category = 'news, blues, USA,UK' - oldest_article = 5 - max_articles_per_feed = 100 - language = 'en' - cover_url = 'http://bluesrss.info/cover.jpg' - masthead_url = 'http://bluesrss.info/cover.jpg' - no_stylesheets = True - - remove_tags = [dict(name='div', attrs={'class': 'wp-pagenavi'})] - - feeds = [(u'News', u'http://bluesrss.info/feed/')] diff --git a/recipes/computer_active.recipe b/recipes/computer_active.recipe deleted file mode 100644 index c5d128cbb1..0000000000 --- a/recipes/computer_active.recipe +++ /dev/null @@ -1,61 +0,0 @@ -#!/usr/bin/env python2 -__license__ = 'GPL v3' -__author__ = 'DrMerry Based on v1.01 by Lorenzo Vigentini' -__copyright__ = 'For version 1.02, 1.03: DrMerry' -__version__ = 'v1.03' -__date__ = '11, July 2011' - -''' -http://www.computeractive.co.uk/ -''' - -from calibre.web.feeds.news import BasicNewsRecipe -import re - - -class computeractive(BasicNewsRecipe): - __author__ = 'DrMerry' - description = 'Computeractive publishes new downloads, reviews, news stories, step-by-step guides and answers to PC problems every day.' - cover_url = 'http://images.pcworld.com/images/common/header/header-logo.gif' - - title = 'Computer act!ve' - publisher = 'Incisive media' - category = 'PC, video, computing, product reviews, editing, cameras, production' - - language = 'en' - timefmt = '[%a, %d %b, %Y]' - - oldest_article = 7 - max_articles_per_feed = 25 - use_embedded_content = False - recursion = 10 - - remove_javascript = True - no_stylesheets = True - remove_empty_feeds = True - remove_tags_after = dict(name='div', attrs={'class': 'article_tags_block'}) - - keep_only_tags = [ - dict(name='div', attrs={'id': 'container_left'}) - ] - - remove_tags = [ - dict(name='div', attrs={'id': ['seeAlsoTags', 'commentsModule', 'relatedArticles', - 'mainLeft', 'mainRight', 'recent_comment_block_parent', 'reviewDetails']}), - dict(name='div', attrs={'class': ['buyIt', 'detailMpu', 'small_section', 'recent_comment_block_parent', - 'title_right_button_fix', 'section_title.title_right_button_fix', 'common_button']}), - dict(name='a', attrs={'class': 'largerImage'}) - ] - - preprocess_regexps = [ - (re.compile(r'(]*>|)', re.DOTALL | re.IGNORECASE), - lambda match: ''), - ] - - feeds = [ - (u'General content', - u'http://feeds.computeractive.co.uk/rss/latest/computeractive/all'), - (u'News', u'http://feeds.computeractive.co.uk/rss/latest/computeractive/news'), - ] - - diff --git a/recipes/epicurious.recipe b/recipes/epicurious.recipe deleted file mode 100644 index da65ad34cd..0000000000 --- a/recipes/epicurious.recipe +++ /dev/null @@ -1,61 +0,0 @@ -#!/usr/bin/env python2 - -__license__ = 'GPL v3' -__copyright__ = '2010, Starson17' -''' -www.epicurious.com -''' -import re -from calibre.web.feeds.news import BasicNewsRecipe - - -class Epicurious(BasicNewsRecipe): - title = u'Epicurious' - __author__ = 'Starson17' - description = 'Food and Recipes from Epicurious' - cover_url = 'http://up6.podbean.com/image-logos/21849_logo.jpg' - publisher = 'Epicurious' - tags = 'news, food, gourmet, recipes' - language = 'en' - use_embedded_content = False - no_stylesheets = True - remove_javascript = True - recursions = 3 - oldest_article = 14 - max_articles_per_feed = 20 - - keep_only_tags = [dict(name='div', attrs={'class': ['mainconsolewrapper', 'videoheader', 'content_unit', 'entry-content', 'see_more_block']}), - dict(name='div', attrs={'id': [ - 'headline', 'introBlock', 'ingredients', 'preparation', 'articleContent', 'in_categories_block']}) - ] - - remove_tags = [{'id': ['printShoppingList', 'addnoteLnk', 'btnUploadVideo', 'enlarge_image']}, - {'class': ['subLnk', 'sbmWrapper', 'detail_division', - 'entry-footer', 'comment-footer']}, - dict(name='div', attrs={'class': ['tagged', 'comments']}) - ] - - remove_tags_after = [dict(name='div', attrs={'class': 'entry-content'})] - - feeds = [ - (u'Recipes: Healthy dinner ', u'http://feeds.epicurious.com/healthy_recipes'), - (u'New Recipes ', u'http://feeds.epicurious.com/newrecipes'), - (u'Features ', u'http://feeds.epicurious.com/latestfeatures'), - (u'Blogs ', u'http://feeds.feedburner.com/epicurious/epiblog') - ] - - match_regexps = [ - r'http://www.epicurious.com/.*recipes/.*/views' - ] - - preprocess_regexps = [ - (re.compile(r'/\n', re.DOTALL | re.IGNORECASE), lambda match: '/'), - (re.compile(r'_116.jpg', re.DOTALL | re.IGNORECASE), lambda match: '.jpg'), - (re.compile('
', - re.DOTALL | re.IGNORECASE), lambda match: '') - ] - - def postprocess_html(self, soup, first_fetch): - for t in soup.findAll(['table', 'tr', 'td']): - t.name = 'div' - return soup diff --git a/recipes/icons/blues.png b/recipes/icons/blues.png deleted file mode 100644 index b9d8650dc2..0000000000 Binary files a/recipes/icons/blues.png and /dev/null differ diff --git a/recipes/icons/linuxdevices.png b/recipes/icons/linuxdevices.png deleted file mode 100644 index 5cf1af6072..0000000000 Binary files a/recipes/icons/linuxdevices.png and /dev/null differ diff --git a/recipes/icons/moneynews.png b/recipes/icons/moneynews.png deleted file mode 100644 index 85185c7b38..0000000000 Binary files a/recipes/icons/moneynews.png and /dev/null differ diff --git a/recipes/icons/utne.png b/recipes/icons/utne.png deleted file mode 100644 index cb24ed5695..0000000000 Binary files a/recipes/icons/utne.png and /dev/null differ diff --git a/recipes/juventudrebelde_english.recipe b/recipes/juventudrebelde_english.recipe deleted file mode 100644 index 336ef7fd97..0000000000 --- a/recipes/juventudrebelde_english.recipe +++ /dev/null @@ -1,43 +0,0 @@ -#!/usr/bin/env python2 - -__license__ = 'GPL v3' -__copyright__ = '2008, Darko Miletic ' -''' -juventudrebelde.co.cu -''' - -from calibre.web.feeds.news import BasicNewsRecipe - - -class Juventudrebelde_english(BasicNewsRecipe): - title = 'Juventud Rebelde in english' - __author__ = 'Darko Miletic' - description = 'The newspaper of Cuban Youth' - publisher = 'Juventud Rebelde' - category = 'news, politics, Cuba' - oldest_article = 2 - max_articles_per_feed = 100 - no_stylesheets = True - use_embedded_content = False - encoding = 'iso-8859-1' - remove_javascript = True - - html2lrf_options = [ - '--comment', description, '--category', category, '--publisher', publisher, '--ignore-tables' - ] - - html2epub_options = 'publisher="' + publisher + \ - '"\ncomments="' + description + '"\ntags="' + category + '"' - - keep_only_tags = [dict(name='div', attrs={'class': 'read'})] - - feeds = [(u'All news', u'http://www.juventudrebelde.cip.cu/rss/all/')] - - def preprocess_html(self, soup): - mtag = '' - soup.head.insert(0, mtag) - for item in soup.findAll(style=True): - del item['style'] - return soup - - language = 'en' diff --git a/recipes/linuxdevices.recipe b/recipes/linuxdevices.recipe deleted file mode 100644 index 565e0b3c6e..0000000000 --- a/recipes/linuxdevices.recipe +++ /dev/null @@ -1,95 +0,0 @@ -__license__ = 'GPL v3' -__copyright__ = '2008, Kovid Goyal ' - -''' -Fetch Linuxdevices. -''' -import re -from calibre.web.feeds.news import BasicNewsRecipe - - -class LinuxDevices(BasicNewsRecipe): - - title = u'Linuxdevices' - description = 'News about Linux driven Hardware' - __author__ = 'Oliver Niesner' - use_embedded_content = False - timefmt = ' [%a %d %b %Y]' - max_articles_per_feed = 50 - no_stylesheets = True - language = 'en' - - remove_javascript = True - conversion_options = {'linearize_tables': True} - encoding = 'latin1' - - remove_tags_after = [dict(id='intelliTxt')] - filter_regexps = [r'ad\.doubleclick\.net'] - - remove_tags = [dict(name='div', attrs={'class': 'bannerSuperBanner'}), - dict(name='div', attrs={'class': 'bannerSky'}), - dict(name='div', attrs={'border': '0'}), - dict(name='div', attrs={'class': 'footerLinks'}), - dict(name='div', attrs={'class': 'seitenanfang'}), - dict(name='td', attrs={'class': 'mar5'}), - dict(name='table', attrs={'class': 'pageAktiv'}), - dict(name='table', attrs={'class': 'xartable'}), - dict(name='table', attrs={'class': 'wpnavi'}), - dict(name='table', attrs={'class': 'bgcontent absatz'}), - dict(name='table', attrs={'class': 'footer'}), - dict(name='table', attrs={'class': 'artikelBox'}), - dict(name='table', attrs={'class': 'kommentare'}), - dict(name='table', attrs={'class': 'pageBoxBot'}), - dict(name='table', attrs={'td': 'height="3"'}), - dict(name='table', attrs={'class': 'contentpaneopen'}), - dict(name='td', attrs={'nowrap': 'nowrap'}), - dict(name='td', attrs={'align': 'left'}), - dict(name='td', attrs={'height': '5'}), - dict(name='td', attrs={'class': 'ArticleWidgetsHeadline'}), - dict(name='div', attrs={ - 'class': 'artikelBox navigatorBox'}), - dict(name='div', attrs={'class': 'similar-article-box'}), - dict(name='div', attrs={'class': 'videoBigHack'}), - dict(name='td', attrs={'class': 'artikelDruckenRight'}), - dict(name='td', attrs={'class': 'width="200"'}), - dict(name='span', attrs={'class': 'content_rating'}), - dict(name='a', attrs={ - 'href': 'http://www.addthis.com/bookmark.php'}), - dict(name='a', attrs={'href': '/news'}), - dict(name='a', attrs={ - 'href': '/cgi-bin/survey/survey.cgi'}), - dict(name='a', attrs={ - 'href': '/cgi-bin/board/UltraBoard.pl'}), - dict(name='iframe'), - dict(name='form'), - dict(name='span', attrs={'class': 'hidePrint'}), - dict(id='ArticleWidgets'), - dict(id='headerLBox'), - dict(id='nointelliTXT'), - dict(id='rechteSpalte'), - dict(id='newsticker-list-small'), - dict(id='ntop5'), - dict(id='ntop5send'), - dict(id='ntop5commented'), - dict(id='nnav-bgheader'), - dict(id='nnav-headerteaser'), - dict(id='nnav-head'), - dict(id='nnav-top'), - dict(id='readcomment')] - - feeds = [(u'Linuxdevices', u'http://www.linuxfordevices.com/rss.xml')] - - def preprocess_html(self, soup): - match = re.compile(r"^Related") - for item in soup.findAll('b', text=match): - item.extract() - for item in soup.findAll(re.compile('^ul')): - item.extract() - for item in soup.findAll('br', limit=10): - item.extract() - return soup - - def postprocess_html(self, soup, first): - for tag in soup.findAll(name=['table', 'tr', 'td']): - tag.name = 'div' - return soup diff --git a/recipes/mac_video.recipe b/recipes/mac_video.recipe deleted file mode 100644 index e8ccab8248..0000000000 --- a/recipes/mac_video.recipe +++ /dev/null @@ -1,72 +0,0 @@ -#!/usr/bin/env python2 -__license__ = 'GPL v3' -__author__ = 'Lorenzo Vigentini' -__copyright__ = '2009, Lorenzo Vigentini ' -__version__ = 'v1.01' -__date__ = '14, January 2010' - -''' -http://www.macvideo.tv/ -''' - -from calibre.web.feeds.news import BasicNewsRecipe -from calibre.ptempfile import PersistentTemporaryFile - -temp_files = [] -articles_are_obfuscated = True - - -class macVideo(BasicNewsRecipe): - __author__ = 'Lorenzo Vigentini' - description = 'MacVideo is an independent journal not affiliated with Apple Computer, It is a publication of IDG Communication focusing on video production and editing.' # noqa - cover_url = 'http://www.macvideo.tv/images/shared/macvideo-logo.jpg' - - title = 'MacVideo ' - publisher = 'IDG Communication' - category = 'Apple, Mac, video, computing, product reviews, editing, cameras, production' - - language = 'en' - encoding = 'cp1252' - timefmt = '[%a, %d %b, %Y]' - - oldest_article = 30 - max_articles_per_feed = 25 - use_embedded_content = False - recursion = 10 - - remove_javascript = True - no_stylesheets = True - - def get_obfuscated_article(self, url): - br = self.get_browser() - br.open(url + '&print') - - response = br.follow_link(url, nr=0) - html = response.read() - - self.temp_files.append(PersistentTemporaryFile('_fa.html')) - self.temp_files[-1].write(html) - self.temp_files[-1].close() - return self.temp_files[-1].name - - keep_only_tags = [ - dict(name='div', attrs={'id': 'mainContent'}) - ] - - remove_tags = [ - dict(name='div', attrs={'class': ['submissionBar', 'mpuContainer']}), - dict(name='p', attrs={'class': 'articlePag'}), - dict(name='ul', attrs={'id': 'articleIconsList'}) - ] - - feeds = [ - (u'News', u'http://www.macvideo.tv/rss/feeds/macvideo-news.xml'), - (u'Reviews', u'http://www.macvideo.tv/rss/feeds/macvideo-reviews.xml'), - (u'Interviews', u'http://www.macvideo.tv/rss/feeds/macvideo-features-interviews.xml'), - (u'Features', u'http://www.macvideo.tv/rss/feeds/macvideo-features-features.xml'), - (u'Rick Young', u'http://www.macvideo.tv/rss/feeds/blog100140.xml'), - (u'Matt Davis', u'http://www.macvideo.tv/rss/feeds/blog101658.xml'), - (u'Adrian Miskelly', - u'http://www.macvideo.tv/rss/feeds/blog101750.xml') - ] - diff --git a/recipes/moneynews.recipe b/recipes/moneynews.recipe deleted file mode 100644 index 323c2813a1..0000000000 --- a/recipes/moneynews.recipe +++ /dev/null @@ -1,47 +0,0 @@ -__license__ = 'GPL v3' -__copyright__ = '2009-2011, Darko Miletic ' -''' -www.moneynews.com -''' - -from calibre.web.feeds.news import BasicNewsRecipe - - -class MoneyNews(BasicNewsRecipe): - title = 'Moneynews.com' - __author__ = 'Darko Miletic' - description = 'Financial news worldwide' - publisher = 'Newsmax.com' - language = 'en' - category = 'news, finances, USA, business' - oldest_article = 2 - max_articles_per_feed = 100 - no_stylesheets = True - use_embedded_content = False - encoding = 'utf8' - extra_css = 'img{display: block} body{font-family: Arial, Helvetica, sans-serif}' - - conversion_options = { - 'comment': description, 'tags': category, 'publisher': publisher, 'language': language, 'linearize_tables': True - } - - feeds = [ - - (u'Street Talk', u'http://www.moneynews.com/rss/StreetTalk/8.xml'), - (u'Finance News', u'http://www.moneynews.com/rss/FinanceNews/4.xml'), - (u'Economy', u'http://www.moneynews.com/rss/Economy/2.xml'), - (u'Companies', u'http://www.moneynews.com/rss/Companies/6.xml'), - (u'Markets', u'http://www.moneynews.com/rss/Markets/7.xml'), - (u'Investing & Analysis', u'http://www.moneynews.com/rss/InvestingAnalysis/17.xml') - ] - - keep_only_tags = [dict(name='div', attrs={'class': 'copy'})] - - remove_tags = [ - dict(attrs={'class': ['MsoNormal', 'MsoNoSpacing']}), - dict(name=['object', 'link', 'embed', 'form', 'meta']) - ] - - def print_version(self, url): - nodeid = url.rpartition('/')[2] - return 'http://www.moneynews.com/PrintTemplate?nodeid=' + nodeid diff --git a/recipes/open_left.recipe b/recipes/open_left.recipe deleted file mode 100644 index 91f3f0d425..0000000000 --- a/recipes/open_left.recipe +++ /dev/null @@ -1,23 +0,0 @@ -from calibre.web.feeds.news import BasicNewsRecipe - - -class OpenLeft(BasicNewsRecipe): - # Information about the recipe - - title = 'Open Left' - description = 'Progressive American commentary on current events' - category = 'news, commentary' - language = 'en' - __author__ = 'Xanthan Gum' - - # Fetch no article older than seven days - - oldest_article = 7 - - # Fetch no more than 100 articles - - max_articles_per_feed = 100 - - # Fetch the articles from the RSS feed - - feeds = [(u'Articles', u'http://www.openleft.com/rss/rss2.xml')] diff --git a/recipes/richmond_times_dispatch.recipe b/recipes/richmond_times_dispatch.recipe deleted file mode 100644 index a202295538..0000000000 --- a/recipes/richmond_times_dispatch.recipe +++ /dev/null @@ -1,114 +0,0 @@ -import re -from calibre.web.feeds.recipes import BasicNewsRecipe - - -class RichmondTimesDispatch(BasicNewsRecipe): - title = u'Richmond Times-Dispatch' - description = "The Richmond Times-Dispatch is the primary daily newspaper in Richmond, \ - the capital of Virginia, United States, as well as the Virginia cities of Petersburg, \ - Chester. Hopewell, Colonial Heights, Charlottesville, Lynchburg, Waynesboro, \ - and is also a default paper for rural regions of the state. \ - The RTD has published in some form for more than 150 years." - __author__ = '_reader' - __date__ = '17 October 2012' - __version__ = '1.6' - cover_url = 'http://static2.dukecms.com/va_tn/timesdispatch_com/site-media/img/icons/logo252x97.png' - masthead_url = 'http://static2.dukecms.com/va_tn/timesdispatch_com/site-media/img/icons/logo252x97.png' - language = 'en' - oldest_article = 1.5 # days - max_articles_per_feed = 100 - ignore_duplicate_articles = {'title', 'url'} - needs_subscription = False - publisher = 'timesdispatch.com' - category = 'news, commentary' - tags = 'news' - publication_type = 'newspaper' - no_stylesheets = True - use_embedded_content = False - encoding = None - simultaneous_downloads = 20 - recursions = 0 - remove_javascript = True - remove_empty_feeds = True - auto_cleanup = False - - conversion_options = { - 'comments': description, - 'tags': tags, - 'language': language, - 'publisher': publisher, - 'authors': publisher, - 'smarten_punctuation': True - } - - remove_tags_before = dict(id='hnews hentry item') - - remove_tags_after = dict(name='hr') - - remove_tags = [ - dict(name='div', attrs={'id': ['mg_hd', 'mg_ft', 'sr_b', 'comments_left', 'comments_right']}), dict(name='div', attrs={'class': ['bottom_social', 'article_bottom']}), dict( name='table', attrs={'class': ['ap-mediabox-table', 'ap-htmltable-table', 'ap-photogallery-table', 'ap-htmlfragment-table']}) # noqa - ] - - preprocess_regexps = [ - (re.compile(r'', - re.DOTALL | re.IGNORECASE), lambda match: ''), - (re.compile(r'

\s*http://www2.timesdispatch.*?

', - re.DOTALL | re.IGNORECASE), lambda match: ''), - (re.compile(r'

\s*', - re.DOTALL | re.IGNORECASE), lambda match: ''), - (re.compile(r'', re.DOTALL | re.IGNORECASE), - lambda match: ''), # strip


line break - (re.compile(r'.', re.DOTALL | - re.IGNORECASE), lambda match: ''), # strip
line break - (re.compile(r'\s*Richmond Times-Dispatch.*?', re.DOTALL | - re.IGNORECASE), lambda match: ''), # strip
line break - ] - - feeds = [ - ('News', 'http://www2.timesdispatch.com/list/feed/rss/news-archive'), - ('Breaking News', 'http://www2.timesdispatch.com/list/feed/rss/breaking-news'), - ('National News', 'http://www2.timesdispatch.com/list/feed/rss/national-news'), - ('Local News', 'http://www2.timesdispatch.com/list/feed/rss/local-news'), - ('Business', 'http://www2.timesdispatch.com/list/feed/rss/business'), - ('Local Business', 'http://www2.timesdispatch.com/list/feed/rss/local-business'), - ('Politics', 'http://www2.timesdispatch.com/list/feed/rss/politics'), - ('Virginia Politics', - 'http://www2.timesdispatch.com/list/feed/rss/virginia-politics'), - ('History', 'http://www2.timesdispatch.com/feed/rss/special_section/news/history'), - ('Sports', 'http://www2.timesdispatch.com/list/feed/rss/sports2'), - ('Health', 'http://www2.timesdispatch.com/feed/rss/lifestyles/health_med_fit/'), - ('Entertainment/Life', 'http://www2.timesdispatch.com/list/feed/rss/entertainment'), - ('Arts/Theatre', - 'http://www2.timesdispatch.com/feed/rss/entertainment/arts_theatre/'), - ('Movies', 'http://www2.timesdispatch.com/list/feed/rss/movies'), - ('Music', 'http://www2.timesdispatch.com/list/feed/rss/music'), - ('Dining & Food', 'http://www2.timesdispatch.com/list/feed/rss/dining'), - ('Home & Garden', 'http://www2.timesdispatch.com/list/feed/rss/home-and-garden/'), - ('Travel', 'http://www2.timesdispatch.com/feed/rss/travel/'), - ('Opinion', 'http://www2.timesdispatch.com/feed/rss/news/opinion/'), - ('Editorials', 'http://www2.timesdispatch.com/list/feed/rss/editorial-desk'), - ('Columnists and Blogs', - 'http://www2.timesdispatch.com/list/feed/rss/news-columnists-blogs'), - ('Opinion Columnists', - 'http://www2.timesdispatch.com/list/feed/rss/opinion-editorial-columnists'), - ('Letters to the Editor', - 'http://www2.timesdispatch.com/list/feed/rss/opinion-letters'), - ('Traffic', 'http://www2.timesdispatch.com/list/feed/rss/traffic'), - ('Drives', 'http://www2.timesdispatch.com/feed/rss/classifieds/transportation/'), - - ] - - def print_version(self, url): - article_num = re.sub(r'(^.*)\-([0-9]{4,10})\/$', r'\g<2>', url) - ap_pat = re.compile('http') - # print '\nDEBUG>>>>>>>>: article_num: ', article_num - # print 'DEBUG>>>>>>>>: ap_pat.search(article_num): ', - # ap_pat.search(article_num) - if ap_pat.search(article_num): # AP article, no print url - # print 'DEBUG>>>>>>>>: AP URL: ', url - return url - else: - printURL = 'http://www2.timesdispatch.com/member-center/share-this/print/?content=ar' + article_num - return printURL diff --git a/recipes/sinfest.recipe b/recipes/sinfest.recipe deleted file mode 100644 index e7be9e4bad..0000000000 --- a/recipes/sinfest.recipe +++ /dev/null @@ -1,31 +0,0 @@ -__license__ = 'GPL v3' -__copyright__ = '2010, Nadid ' -''' -http://www.sinfest.net -''' - -from calibre.web.feeds.news import BasicNewsRecipe - - -class SinfestBig(BasicNewsRecipe): - title = 'Sinfest' - __author__ = 'nadid' - description = 'Sinfest' - reverse_article_order = False - oldest_article = 5 - max_articles_per_feed = 100 - no_stylesheets = True - use_embedded_content = True - encoding = 'utf-8' - publisher = 'Tatsuya Ishida/Museworks' - category = 'comic' - language = 'en' - - conversion_options = { - 'comments': description, 'tags': category, 'language': language, 'publisher': publisher - } - - feeds = [(u'SinFest', u'http://henrik.nyh.se/scrapers/sinfest.rss')] - - def get_article_url(self, article): - return article.get('link') diff --git a/recipes/statesman.recipe b/recipes/statesman.recipe deleted file mode 100644 index 36f36a26ae..0000000000 --- a/recipes/statesman.recipe +++ /dev/null @@ -1,39 +0,0 @@ -from calibre.web.feeds.news import BasicNewsRecipe - - -class AdvancedUserRecipe1278049615(BasicNewsRecipe): - title = u'Statesman' - pubisher = 'http://www.statesman.com/' - description = 'Austin Texas Daily Newspaper' - category = 'News, Austin, Texas' - __author__ = 'rty' - oldest_article = 3 - - max_articles_per_feed = 100 - - feeds = [(u'News', - u'http://www.statesman.com/section-rss.do?source=news&includeSubSections=true'), - (u'Local', u'http://www.statesman.com/section-rss.do?source=local&includeSubSections=true'), - (u'Business', u'http://www.statesman.com/section-rss.do?source=business&includeSubSections=true'), - (u'Life', u'http://www.statesman.com/section-rss.do?source=life&includesubsection=true'), - (u'Editorial', u'http://www.statesman.com/section-rss.do?source=opinion&includesubsections=true'), - (u'Sports', u'http://www.statesman.com/section-rss.do?source=sports&includeSubSections=true') - ] - masthead_url = "http://www.statesman.com/images/cmg-logo.gif" - - remove_javascript = True - use_embedded_content = False - no_stylesheets = True - language = 'en' - encoding = 'utf-8' - conversion_options = {'linearize_tables': True} - remove_tags = [ - dict(name='div', attrs={'id': 'cxArticleOptions'}), - {'class': ['perma', 'comments', 'trail', 'share-buttons', - 'toggle_show_on']}, - ] - keep_only_tags = [ - dict(name='div', attrs={'class': 'cxArticleHeader'}), - dict(name='div', attrs={'id': ['cxArticleBodyText', - 'content']}), - ] diff --git a/recipes/utne.recipe b/recipes/utne.recipe deleted file mode 100644 index 946cf361d3..0000000000 --- a/recipes/utne.recipe +++ /dev/null @@ -1,53 +0,0 @@ -#!/usr/bin/env python2 - -__license__ = 'GPL v3' -__copyright__ = '2008, Darko Miletic ' -''' -utne.com -''' - -from calibre.ebooks.BeautifulSoup import BeautifulSoup -from calibre.web.feeds.news import BasicNewsRecipe - - -class Utne(BasicNewsRecipe): - title = u'Utne reader' - __author__ = 'Darko Miletic' - description = 'News' - oldest_article = 7 - max_articles_per_feed = 100 - no_stylesheets = True - use_embedded_content = False - language = 'en' - - cover_url = 'http://www.utne.com/images/template/logo.gif' - - remove_tags = [ - dict(name='a', attrs={'id': 'ctl00_blankmaster_lnkBanner'}), dict( - name='object') - ] - - feeds = [ - - (u'Politics', u'http://www.utne.com/rss/Politics.xml'), - (u'Environment', u'http://www.utne.com/rss/Environment.xml'), - (u'Media', u'http://www.utne.com/rss/Media.xml'), - (u'Great writing', u'http://www.utne.com/rss/Great-Writing.xml'), - (u'Science & Technology', u'http://www.utne.com/rss/Science-Technology.xml'), - (u'Arts', u'http://www.utne.com/rss/Arts.xml') - ] - - def print_version(self, url): - raw = self.browser.open(url).read() - soup = BeautifulSoup(raw.decode('utf8', 'replace')) - print_link = soup.find( - 'a', {'id': 'ctl00_defaultmaster_Blog_tools1_lnkPrint'}) - if print_link is None: - return url - return print_link['href'] - - def preprocess_html(self, soup): - mtag = '' - soup.head.insert(0, mtag) - del(soup.body['onload']) - return soup diff --git a/recipes/winter_olympics.recipe b/recipes/winter_olympics.recipe deleted file mode 100644 index c533e656e6..0000000000 --- a/recipes/winter_olympics.recipe +++ /dev/null @@ -1,116 +0,0 @@ -#!/usr/bin/env python2 - -__license__ = 'GPL v3' -__copyright__ = '2010, Starson17' -''' -www.nbcolympics.com -''' -from calibre.web.feeds.news import BasicNewsRecipe - - -class Olympics_2010(BasicNewsRecipe): - title = u'NBC Olympics 2010' - __author__ = 'Starson17' - description = 'Olympics 2010' - cover_url = 'http://www.digitaljournal.com/img/1/1/2/1/i/4/7/6/o/WinterOlympics2010-logo.jpg' - publisher = 'Olympics 2010' - tags = 'Olympics news' - language = 'en' - use_embedded_content = False - no_stylesheets = True - remove_javascript = True - # recursions = 3 - oldest_article = 7 - max_articles_per_feed = 10 - - keep_only_tags = [dict(name='div', attrs={'class': ['Article ', 'ArticleGallery']}), - ] - - remove_tags = [dict(name='div', attrs={'id': ['RelatedTagsBox', 'ShareBox']}), - dict(name='div', attrs={ - 'class': ['DateUtilities', 'PhotoGallery BoxRight', 'Frame', 'ToolBox']}), - ] - - # RSS feeds are at: http://www.nbcolympics.com/rss/index.html - feeds = [ - ('NBCOlympics.com - News', - 'http://www.nbcolympics.com/rss/newscenter/mostpopular.xml'), - ('NBCOlympics.com - News - Top Stories', - 'http://www.nbcolympics.com/rss/newscenter/topstories.xml'), - ('NBCOlympics.com - News - Latest Headlines', - 'http://www.nbcolympics.com/rss/newscenter/latestnews.xml'), - # ('NBCOlympics.com - Photos', 'http://www.nbcolympics.com/rss/photos/mostpopular.xml'), - # ('NBCOlympics.com - Photos - Editorial Picks', 'http://www.nbcolympics.com/rss/photos/editorialpicks.xml'), - # ('NBCOlympics.com - Photos - Latest Slideshows', 'http://www.nbcolympics.com/rss/photos/latestslideshows.xml'), - ('NBCOlympics.com - Team USA - Latest news', - 'http://www.nbcolympics.com/rss/countries/team-usa/index.xml'), - # ('NBCOlympics.com - Team USA - Latest Slideshows', 'http://www.nbcolympics.com/rss/countries/team-usa/photos/index.xml'), - # ('NBCOlympics.com - Team USA - Video', 'http://www.nbcolympics.com/rss/countries/team-usa/video/index.xml'), - # ('NBCOlympics.com - Alpine Skiing - Most Popular News', 'http://www.nbcolympics.com/rss/sport=AS/mostpopular.xml'), - # ('NBCOlympics.com - Alpine Skiing - Top News', 'http://www.nbcolympics.com/rss/sport=AS/topnews.xml'), - ('NBCOlympics.com - Alpine Skiing - Latest News', - 'http://www.nbcolympics.com/rss/sport=AS/latestnews.xml'), - # ('NBCOlympics.com - Biathlon - Most Popular News', 'http://www.nbcolympics.com/rss/sport=BT/mostpopular.xml'), - # ('NBCOlympics.com - Biathlon - Top News', 'http://www.nbcolympics.com/rss/sport=BT/topnews.xml'), - ('NBCOlympics.com - Biathlon - Latest News', - 'http://www.nbcolympics.com/rss/sport=BT/latestnews.xml'), - # ('NBCOlympics.com - Bobsled - Most Popular News', 'http://www.nbcolympics.com/rss/sport=BS/mostpopular.xml'), - # ('NBCOlympics.com - Bobsled - Top News', 'http://www.nbcolympics.com/rss/sport=BS/topnews.xml'), - ('NBCOlympics.com - Bobsled - Latest News', - 'http://www.nbcolympics.com/rss/sport=BS/latestnews.xml'), - # ('NBCOlympics.com - Cross-Country - Most Popular News', 'http://www.nbcolympics.com/rss/sport=CC/mostpopular.xml'), - # ('NBCOlympics.com - Cross-Country - Top News', 'http://www.nbcolympics.com/rss/sport=CC/topnews.xml'), - ('NBCOlympics.com - Cross-Country - Latest News', - 'http://www.nbcolympics.com/rss/sport=CC/latestnews.xml'), - # ('NBCOlympics.com - Curling - Most Popular News', 'http://www.nbcolympics.com/rss/sport=CU/mostpopular.xml'), - # ('NBCOlympics.com - Curling - Top News', 'http://www.nbcolympics.com/rss/sport=CU/topnews.xml'), - ('NBCOlympics.com - Curling - Latest News', - 'http://www.nbcolympics.com/rss/sport=CU/latestnews.xml'), - # ('NBCOlympics.com - Figure Skating - Most Popular News', 'http://www.nbcolympics.com/rss/sport=FS/mostpopular.xml'), - # ('NBCOlympics.com - Figure Skating - Top News', 'http://www.nbcolympics.com/rss/sport=FS/topnews.xml'), - ('NBCOlympics.com - Figure Skating - Latest News', - 'http://www.nbcolympics.com/rss/sport=FS/latestnews.xml'), - # ('NBCOlympics.com - Freestyle Skiing - Most Popular News', 'http://www.nbcolympics.com/rss/sport=FR/mostpopular.xml'), - # ('NBCOlympics.com - Freestyle Skiing - Top News', 'http://www.nbcolympics.com/rss/sport=FR/topnews.xml'), - ('NBCOlympics.com - Freestyle Skiing - Latest News', - 'http://www.nbcolympics.com/rss/sport=FR/latestnews.xml'), - # ('NBCOlympics.com - Hockey - Most Popular News', 'http://www.nbcolympics.com/rss/sport=IH/mostpopular.xml'), - # ('NBCOlympics.com - Hockey - Top News', 'http://www.nbcolympics.com/rss/sport=IH/topnews.xml'), - ('NBCOlympics.com - Hockey - Latest News', - 'http://www.nbcolympics.com/rss/sport=IH/latestnews.xml'), - # ('NBCOlympics.com - Luge - Most Popular News', 'http://www.nbcolympics.com/rss/sport=LG/mostpopular.xml'), - # ('NBCOlympics.com - Luge - Top News', 'http://www.nbcolympics.com/rss/sport=LG/topnews.xml'), - ('NBCOlympics.com - Luge - Latest News', - 'http://www.nbcolympics.com/rss/sport=LG/latestnews.xml'), - # ('NBCOlympics.com - Nordic Combined - Most Popular News', 'http://www.nbcolympics.com/rss/sport=NC/mostpopular.xml'), - # ('NBCOlympics.com - Nordic Combined - Top News', 'http://www.nbcolympics.com/rss/sport=NC/topnews.xml'), - ('NBCOlympics.com - Nordic Combined - Latest News', - 'http://www.nbcolympics.com/rss/sport=NC/latestnews.xml'), - # ('NBCOlympics.com - Short Track - Most Popular News', 'http://www.nbcolympics.com/rss/sport=ST/mostpopular.xml'), - # ('NBCOlympics.com - Short Track - Top News', 'http://www.nbcolympics.com/rss/sport=ST/topnews.xml'), - ('NBCOlympics.com - Short Track - Latest News', - 'http://www.nbcolympics.com/rss/sport=ST/latestnews.xml'), - # ('NBCOlympics.com - Skeleton - Most Popular News', 'http://www.nbcolympics.com/rss/sport=SN/mostpopular.xml'), - # ('NBCOlympics.com - Skeleton - Top News', 'http://www.nbcolympics.com/rss/sport=SN/topnews.xml'), - ('NBCOlympics.com - Skeleton - Latest News', - 'http://www.nbcolympics.com/rss/sport=SN/latestnews.xml'), - # ('NBCOlympics.com - Ski Jumping - Most Popular News', 'http://www.nbcolympics.com/rss/sport=SJ/mostpopular.xml'), - # ('NBCOlympics.com - Ski Jumping - Top News', 'http://www.nbcolympics.com/rss/sport=SJ/topnews.xml'), - ('NBCOlympics.com - Ski Jumping - Latest News', - 'http://www.nbcolympics.com/rss/sport=SJ/latestnews.xml'), - # ('NBCOlympics.com - Snowboarding - Most Popular News', 'http://www.nbcolympics.com/rss/sport=SB/mostpopular.xml'), - # ('NBCOlympics.com - Snowboarding - Top News', 'http://www.nbcolympics.com/rss/sport=SB/topnews.xml'), - ('NBCOlympics.com - Snowboarding - Latest News', - 'http://www.nbcolympics.com/rss/sport=SB/latestnews.xml'), - # ('NBCOlympics.com - Speed Skating - Most Popular News', 'http://www.nbcolympics.com/rss/sport=AS/mostpopular.xml'), - # ('NBCOlympics.com - Speed Skating - Top News', 'http://www.nbcolympics.com/rss/sport=AS/topnews.xml'), - ('NBCOlympics.com - Speed Skating - Latest News', - 'http://www.nbcolympics.com/rss/sport=AS/latestnews.xml'), - ] - - extra_css = ''' - h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;} - h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;} - p{font-family:Arial,Helvetica,sans-serif;font-size:small;} - body{font-family:Helvetica,Arial,sans-serif;font-size:small;} - '''