diff --git a/recipes/al_ahram.recipe b/recipes/al_ahram.recipe deleted file mode 100644 index d22f3331ee..0000000000 --- a/recipes/al_ahram.recipe +++ /dev/null @@ -1,76 +0,0 @@ -# coding=utf-8 -__license__ = 'GPL v3' -__copyright__ = '2011-2016, Hassan Williamson ' -''' -ahram.org.eg -''' -from calibre.web.feeds.recipes import BasicNewsRecipe - - -class AlAhram(BasicNewsRecipe): - title = u'Al-Ahram (الأهرام)' - __author__ = 'Hassan Williamson' - description = 'The Arabic version of the Al-Ahram newspaper.' - language = 'ar' - encoding = 'utf8' - cover_url = 'http://www.ahram.org.eg/Media/News/2015/3/14/2015-635619650946000713-600.jpg' - oldest_article = 7 - max_articles_per_feed = 100 - no_stylesheets = True - use_embedded_content = False - publisher = 'Al-Ahram' - category = 'News' - publication_type = 'newsportal' - - extra_css = ' body{ font-family: Verdana,Helvetica,Arial,sans-serif; direction: rtl; } .bbtitle{ font-weight: bold; font-size: 2em; } .bbsubtitle{ font-size: 1.3em; } #WriterImage{ height: 10px; } ' # noqa - - keep_only_tags = [ - dict(name='div', attrs={'class': ['bbcolright']}) - ] - - remove_tags = [ - dict(name='div', attrs={'class': ['bbnav', 'bbsp']}), - dict(name='div', attrs={'id': ['AddThisButton']}), - dict(name='a', attrs={'class': ['twitter-share-button']}), - dict(name='div', attrs={'id': ['ReaderCount']}), - ] - - remove_attributes = [ - 'width', 'height', 'style' - ] - - feeds = [ - (u'الأولى', 'http://www.ahram.org.eg/archive/RssXml.aspx?CategoryID=25'), - (u'الصفحة الثانية', - 'http://www.ahram.org.eg/archive/RssXml.aspx?CategoryID=74'), - (u'مصر', 'http://www.ahram.org.eg/archive/RssXml.aspx?CategoryID=27'), - (u'المشهد السياسي', - 'http://www.ahram.org.eg/archive/RssXml.aspx?CategoryID=60'), - (u'المحافظات', 'http://www.ahram.org.eg/archive/RssXml.aspx?CategoryID=29'), - (u'الوطن العربي', - 'http://www.ahram.org.eg/archive/RssXml.aspx?CategoryID=31'), - (u'العالم', 'http://www.ahram.org.eg/archive/RssXml.aspx?CategoryID=26'), - (u'تقارير المراسلين', - 'http://www.ahram.org.eg/archive/RssXml.aspx?CategoryID=2'), - (u'تحقيقات', 'http://www.ahram.org.eg/archive/RssXml.aspx?CategoryID=3'), - (u'قضايا واراء', - 'http://www.ahram.org.eg/archive/RssXml.aspx?CategoryID=4'), - (u'اقتصاد', 'http://www.ahram.org.eg/archive/RssXml.aspx?CategoryID=5'), - (u'رياضة', 'http://www.ahram.org.eg/archive/RssXml.aspx?CategoryID=6'), - (u'حوادث', 'http://www.ahram.org.eg/archive/RssXml.aspx?CategoryID=38'), - (u'دنيا الثقافة', - 'http://www.ahram.org.eg/archive/RssXml.aspx?CategoryID=7'), - (u'المراة والطفل', - 'http://www.ahram.org.eg/archive/RssXml.aspx?CategoryID=8'), - (u'يوم جديد', 'http://www.ahram.org.eg/archive/RssXml.aspx?CategoryID=9'), - (u'الكتاب', 'http://www.ahram.org.eg/archive/RssXml.aspx?CategoryID=10'), - (u'الاعمدة', 'http://www.ahram.org.eg/archive/RssXml.aspx?CategoryID=11'), - (u'أراء حرة', 'http://www.ahram.org.eg/archive/RssXml.aspx?CategoryID=59'), - (u'ملفات الاهرام', - 'http://www.ahram.org.eg/archive/RssXml.aspx?CategoryID=12'), - (u'بريد الاهرام', - 'http://www.ahram.org.eg/archive/RssXml.aspx?CategoryID=15'), - (u'برلمان الثورة', - 'http://www.ahram.org.eg/archive/RssXml.aspx?CategoryID=61'), - (u'الاخيرة', 'http://www.ahram.org.eg/archive/RssXml.aspx?CategoryID=16'), - ] diff --git a/recipes/albertslundlokalavisen_dk.recipe b/recipes/albertslundlokalavisen_dk.recipe deleted file mode 100644 index 55d2c26714..0000000000 --- a/recipes/albertslundlokalavisen_dk.recipe +++ /dev/null @@ -1,31 +0,0 @@ -#!/usr/bin/env python2 -# vim:fileencoding=utf-8 -# https://manual.calibre-ebook.com/news_recipe.html -from __future__ import unicode_literals, division, absolute_import, print_function -from calibre.web.feeds.news import BasicNewsRecipe -''' -Albertslund Posten -''' - - -class AlbertslundLokalavisen_dk(BasicNewsRecipe): - __author__ = 'CoderAllan.github.com' - title = 'Albertslund Posten' - description = ('RSS feed med sidste nyt fra Albertslund Posten. Der er nye historier flere gange dagligt' - ' - få de seneste nyheder fra dit lokalområde automatisk. Albertslund Posten. albertslund.lokalavisen.dk') - category = 'newspaper, news, localnews, sport, culture, Denmark' - oldest_article = 7 - max_articles_per_feed = 25 - auto_cleanup = True - language = 'da' - - feeds = [ - ('Seneste nyt fra Albertslund Posten', 'http://albertslund.lokalavisen.dk/section/senestenytrss'), - ('Seneste lokale nyheder fra Albertslund Posten', 'http://albertslund.lokalavisen.dk/section/senestelokalenyhederrss'), - ('Seneste sport fra Albertslund Posten', 'http://albertslund.lokalavisen.dk/section/senestesportrss'), - ('Seneste 112 nyheder fra Albertslund Posten', 'http://albertslund.lokalavisen.dk/section/seneste112rss'), - ('Seneste kultur nyheder fra Albertslund Posten', 'http://albertslund.lokalavisen.dk/section/senestekulturrss'), - ('Seneste læserbreve fra Albertslund Posten', 'http://albertslund.lokalavisen.dk/section/senestelaeserbreverss'), - - ] - diff --git a/recipes/banat_news.recipe b/recipes/banat_news.recipe deleted file mode 100644 index d8b2592b89..0000000000 --- a/recipes/banat_news.recipe +++ /dev/null @@ -1,71 +0,0 @@ - -''' -www.philstar.com -''' - -import time -from calibre.web.feeds.recipes import BasicNewsRecipe - - -class BanatNews(BasicNewsRecipe): - title = 'Banat News' - custom_title = "Banat News - " + time.strftime('%d %b %Y %I:%M %p') - __author__ = 'jde' - __date__ = '31 May 2012' - __version__ = '1.0' - description = 'Banat News is a daily Cebuano-language newspaper based in Cebu, Philippines - philstar.com is a Philippine news and entertainment portal for the Filipino global community. It is the online presence of the STAR Group of Publications, a leading publisher of newspapers and magazines in the Philippines.' # noqa - language = 'ceb' - publisher = 'The Philippine STAR' - category = 'news, Philippines' - tags = 'news, Philippines' - cover_url = 'http://www.philstar.com/images/logo_Banat.jpg' - masthead_url = 'http://www.philstar.com/images/logo_Banat.jpg' - oldest_article = 1.5 # days - max_articles_per_feed = 25 - simultaneous_downloads = 10 - publication_type = 'newspaper' - timefmt = ' [%a, %d %b %Y %I:%M %p]' - no_stylesheets = True - use_embedded_content = False - encoding = None - recursions = 0 - needs_subscription = False - remove_javascript = True - remove_empty_feeds = True - auto_cleanup = False - - remove_tags = [dict(name='img', attrs={'id': 'Image1'}) # Logo - # Section (Headlines, Nation, Metro, ...) - # Comments - # View Comments - # Zoom - , dict(name='span', attrs={'id': 'ControlArticle1_LabelHeader'}), dict(name='a', attrs={'id': 'ControlArticle1_FormView1_hlComments'}), dict(name='img', attrs={'src': 'images/post-comments.jpg'}), dict(name='a', attrs={'id': 'ControlArticle1_FormView1_ControlPhotoAndCaption1_hlImageCaption'}) # noqa - ] - conversion_options = {'title': custom_title, - 'comments': description, - 'tags': tags, - 'language': language, - 'publisher': publisher, - 'authors': publisher, - 'smarten_punctuation': True - } - - feeds = [ - - ('Balita' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=101'), - ('Opinyon' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=102'), - ('Kalingawan' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=104'), - ('Showbiz' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=62'), - ('Palaro' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=103'), - ('Imong Kapalaran' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=105') - ] - -# process the printer friendly version of article - def print_version(self, url): - return url.replace('/Article', '/ArticlePrinterFriendly') - -# obtain title from printer friendly version of article; avoiding -# add_toc_thumbnail changing title when article has image - def populate_article_metadata(self, article, soup, first): - article.title = soup.find( - 'span', {'id': 'ControlArticle1_FormView1_ArticleHeaderLabel'}).contents[0].strip() diff --git a/recipes/ciekawostki_historyczne.recipe b/recipes/ciekawostki_historyczne.recipe deleted file mode 100644 index 7df7b61e9d..0000000000 --- a/recipes/ciekawostki_historyczne.recipe +++ /dev/null @@ -1,48 +0,0 @@ -from calibre.web.feeds.news import BasicNewsRecipe -import re - - -class Ciekawostki_Historyczne(BasicNewsRecipe): - title = u'Ciekawostki Historyczne' - oldest_article = 7 - __author__ = u'fenuks & Tomasz Długosz' - description = u'Serwis popularnonaukowy - odkrycia, kontrowersje, historia, ciekawostki, badania, ciekawostki z przeszłości.' - category = 'history' - language = 'pl' - masthead_url = 'http://ciekawostkihistoryczne.pl/wp-content/themes/Wordpress_Magazine/images/logo-ciekawostki-historyczne-male.jpg' - cover_url = 'http://ciekawostkihistoryczne.pl/wp-content/themes/Wordpress_Magazine/images/logo-ciekawostki-historyczne-male.jpg' - max_articles_per_feed = 100 - extra_css = 'img.alignleft {float:left; margin-right:5px;} .alignright {float:right; margin-left:5px;}' - oldest_article = 12 - preprocess_regexps = [(re.compile(u'Ten artykuł ma kilka stron.*?', re.DOTALL), - lambda match: ''), (re.compile(u'

Zobacz też:

.*?', re.DOTALL), lambda match: '')] - no_stylesheets = True - remove_empty_feeds = True - keep_only_tags = [dict(name='div', attrs={'class': 'post'})] - recursions = 5 - remove_tags = [dict(id=['catapult-cookie-bar','header','footer','rightcolumn','singlepostinfo']), dict( - attrs={'class': ['ubm_banner','ciekawostki-slider-popular','books short floatRight', 'unprintable', 'booksTable', 'bawmrp']})] - - feeds = [ - (u'Staro\u017cytno\u015b\u0107', u'http://ciekawostkihistoryczne.pl/tag/starozytnosc/feed/'), - (u'\u015aredniowiecze', u'http://ciekawostkihistoryczne.pl/tag/sredniowiecze/feed/'), - (u'Nowo\u017cytno\u015b\u0107', u'http://ciekawostkihistoryczne.pl/tag/nowozytnosc/feed/'), - (u'XIX wiek', u'http://ciekawostkihistoryczne.pl/tag/xix-wiek/feed/'), - - (u'1914-1939', u'http://ciekawostkihistoryczne.pl/tag/1914-1939/feed/'), - (u'1939-1945', u'http://ciekawostkihistoryczne.pl/tag/1939-1945/feed/'), - (u'Powojnie (od 1945)', u'http://ciekawostkihistoryczne.pl/tag/powojnie/feed/'), - (u'Recenzje', u'http://ciekawostkihistoryczne.pl/category/recenzje/feed/')] - - def is_link_wanted(self, url, tag): - return 'ciekawostkihistoryczne' in url and url[-2] in {'2', '3', '4', '5', '6'} - - def postprocess_html(self, soup, first_fetch): - tag = soup.find('h7') - if tag: - tag.nextSibling.extract() - if not first_fetch: - for r in soup.findAll(['h1']): - r.extract() - soup.find('h6').nextSibling.extract() - return soup diff --git a/recipes/computing_uk.recipe b/recipes/computing_uk.recipe deleted file mode 100644 index 97b8b53c8a..0000000000 --- a/recipes/computing_uk.recipe +++ /dev/null @@ -1,123 +0,0 @@ -# -*- mode: python -*- -# -*- coding: utf-8 -*- - -__license__ = 'GPL v3' -__copyright__ = '2018, Darko Miletic ' -''' -www.computing.co.uk -''' - -from calibre.web.feeds.news import BasicNewsRecipe - - -class Computing_UK(BasicNewsRecipe): - title = 'Computing' - __author__ = 'Darko Miletic' - description = 'Computing is the leading information resource for UK technology decision makers, providing the latest market news and hard-hitting opinion.' - publisher = 'Incisive Business Media Limited' - category = 'it computing uk, computing events, big data summit, cloud and infrastructure, it devops, computing security, HP, intel' - oldest_article = 7 - no_stylesheets = True - encoding = 'utf8' - use_embedded_content = False - language = 'en_GB' - remove_empty_feeds = True - publication_type = 'newsportal' - auto_cleanup = True - resolve_internal_links = True - needs_subscription = True - ignore_duplicate_articles = {'url'} - INDEX = 'https://www.computing.co.uk/' - LOGIN = 'https://www.computing.co.uk/userlogin' - - def get_browser(self): - - def is_form_login(form): - return "id" in form.attrs and form.attrs['id'] == "userlogin" - - br = BasicNewsRecipe.get_browser(self) - br.open(self.INDEX) - if self.username: - br.open(self.LOGIN) - br.select_form(predicate=is_form_login) - br['subscriber[email_id]'] = self.username - br['subscriber[password]'] = self.password - br.submit() - return br - - extra_css = """ - body{font-family: sans-serif} - img{margin-top:1em; margin-bottom: 1em; display:block} - """ - - conversion_options = { - 'comment': description, - 'tags': category, - 'publisher': publisher, - 'language': language - } - - feeds = [ - ( - u'Financial Solutions', - u'https://www.computing.co.uk/feeds/rss/category/financial-solutions/' - ), - ( - u'Big Data', - u'https://www.computing.co.uk/feeds/rss/category/big-data-and-analytics/' - ), - (u'DevOps', u'https://www.computing.co.uk/feeds/rss/category/devops/'), - ( - u'Cloud and Infrastructure', - u'https://www.computing.co.uk/feeds/rss/category/cloud-and-infrastructure/' - ), - ( - u'Internet of Things', - u'https://www.computing.co.uk/feeds/rss/category/internet-of-things/' - ), - ( - u'Leadership', - u'https://www.computing.co.uk/feeds/rss/category/leadership/' - ), - ( - u'Application', - u'https://www.computing.co.uk/feeds/rss/category/software/applications/' - ), - ( - u'Business Software', - u'https://www.computing.co.uk/feeds/rss/category/software/business-software/' - ), - ( - u'Developer', - u'https://www.computing.co.uk/feeds/rss/category/software/developer/' - ), - ( - u'Mobile Software', - u'https://www.computing.co.uk/feeds/rss/category/software/mobile-software/' - ), - (u'Strategy', u'https://www.computing.co.uk/feeds/rss/category/strategy/'), - ( - u'Corporate', - u'https://www.computing.co.uk/feeds/rss/category/management/corporate/' - ), - ( - u'Privacy', - u'https://www.computing.co.uk/feeds/rss/category/security/privacy/' - ), - (u'Security', u'https://www.computing.co.uk/feeds/rss/category/security/'), - (u'Hardware', u'https://www.computing.co.uk/feeds/rss/category/hardware/'), - ( - u'Mobile Phones', - u'https://www.computing.co.uk/feeds/rss/category/hardware/mobile-phones/' - ), - ( - u'Communications', - u'https://www.computing.co.uk/feeds/rss/category/communications/' - ), - ( - u'Public Sector', - u'https://www.computing.co.uk/feeds/rss/category/public-sector/' - ), - (u'Security', u'https://www.computing.co.uk/feeds/rss/category/security/'), - (u'Security', u'https://www.computing.co.uk/feeds/rss/category/security/'), - ] diff --git a/recipes/halsnaeslokalavisen_dk.recipe b/recipes/halsnaeslokalavisen_dk.recipe deleted file mode 100644 index c77b6b7387..0000000000 --- a/recipes/halsnaeslokalavisen_dk.recipe +++ /dev/null @@ -1,30 +0,0 @@ -#!/usr/bin/env python2 -# vim:fileencoding=utf-8 -# https://manual.calibre-ebook.com/news_recipe.html -from __future__ import unicode_literals, division, absolute_import, print_function -from calibre.web.feeds.news import BasicNewsRecipe -''' -Halsnæs Avis -''' - - -class HalsnaesLokalavisen_dk(BasicNewsRecipe): - __author__ = 'CoderAllan.github.com' - title = 'Halsnæs Avis' - description = 'Lokale og regionale nyheder, sport og kultur fra Halsnæs og omegn på halsnaes.lokalavisen.dk' - category = 'newspaper, news, localnews, sport, culture, Denmark' - oldest_article = 7 - max_articles_per_feed = 50 - auto_cleanup = True - language = 'da' - - feeds = [ - ('Seneste nyt fra Halsnæs Avis', 'http://halsnaes.lokalavisen.dk/section/senestenytrss'), - ('Seneste lokale nyheder fra Halsnæs Avis', 'http://halsnaes.lokalavisen.dk/section/senestelokalenyhederrss'), - ('Seneste sport fra Halsnæs Avis', 'http://halsnaes.lokalavisen.dk/section/senestesportrss'), - ('Seneste 112 nyheder fra Halsnæs Avis', 'http://halsnaes.lokalavisen.dk/section/seneste112rss'), - ('Seneste kultur nyheder fra Halsnæs Avis', 'http://halsnaes.lokalavisen.dk/section/senestekulturrss'), - ('Seneste læserbreve fra Halsnæs Avis', 'http://halsnaes.lokalavisen.dk/section/senestelaeserbreverss'), - - ] - diff --git a/recipes/heritage_foundation.recipe b/recipes/heritage_foundation.recipe deleted file mode 100644 index 66e0ec1464..0000000000 --- a/recipes/heritage_foundation.recipe +++ /dev/null @@ -1,81 +0,0 @@ -from calibre.web.feeds.news import BasicNewsRecipe - - -class HeritageFoundation(BasicNewsRecipe): - title = u'The Heritage Foundation' - description = 'Founded in 1973, The Heritage Foundation is a research and educational institution—a think tank—\ -whose mission is to formulate and promote conservative public policies based on the principles of free enterprise, limited government, \ -individual freedom, traditional American values, and a strong national defense.' - __author__ = '_reader' - __date__ = '05 July 2012' - __version__ = '1.0' - oldest_article = 30 - max_articles_per_feed = 100 - publisher = 'The Heritage Foundation' - category = 'commentary' - tags = 'commentary' - language = 'en' - publication_type = 'blog' - cover_url = 'http://www.heritage.org/static/images/logo.jpg' - masthead_url = 'http://www.heritage.org/static/images/logo.jpg' - encoding = None - use_embedded_content = False - no_stylesheets = True - remove_javascript = True - recursions = 0 - remove_empty_feeds = True - auto_cleanup = True - - conversion_options = { - 'comments': description, - 'tags': tags, - 'language': language, - 'publisher': publisher, - 'authors': publisher, - 'smarten_punctuation': True - } - - feeds = [ - (u'Agriculture', u'http://origin.heritage.org/static/RSS/Agriculture.xml'), - (u'Alliances', u'http://origin.heritage.org/static/RSS/Alliances.xml'), - (u'Arms Control and Non-Proliferation', - u'http://origin.heritage.org/static/RSS/Arms-Control-and-Non-Proliferation.xml'), - (u'Budget and Spending', - u'http://origin.heritage.org/static/RSS/Budget-and-Spending.xml'), - (u'Economic Freedom', u'http://origin.heritage.org/static/RSS/Economic-Freedom.xml'), - (u'Economy', u'http://origin.heritage.org/static/RSS/Economy.xml'), - (u'Education', u'http://origin.heritage.org/static/RSS/Education.xml'), - (u'Energy and Environment', - u'http://origin.heritage.org/static/RSS/Energy-and-Environment.xml'), - (u'Family and Marriage', - u'http://origin.heritage.org/static/RSS/Family-And-Marriage.xml'), - (u'Foreign Aid and Development', - u'http://origin.heritage.org/static/RSS/Foreign-Aid-and-Development.xml'), - (u'Health Care', u'http://origin.heritage.org/static/RSS/Health-Care.xml'), - (u'Homeland Security', u'http://origin.heritage.org/static/RSS/Homeland-Security.xml'), - (u'Housing', u'http://origin.heritage.org/static/RSS/Housing.xml'), - (u'Immigration', u'http://origin.heritage.org/static/RSS/Immigration.xml'), - (u'International Conflicts', - u'http://origin.heritage.org/static/RSS/International-Conflicts.xml'), - (u'International Law', u'http://origin.heritage.org/static/RSS/International-Law.xml'), - (u'Labor', u'http://origin.heritage.org/static/RSS/Labor.xml'), - (u'Legal Issues', u'http://origin.heritage.org/static/RSS/Legal.xml'), - (u'Missile Defense', u'http://origin.heritage.org/static/RSS/Missile-Defense.xml'), - (u'National Security and Defense', - u'http://origin.heritage.org/static/RSS/National-Security-and-Defense.xml'), - (u'Political Thought', u'http://origin.heritage.org/static/RSS/Political-Thought.xml'), - (u'Public Diplomacy', u'http://origin.heritage.org/static/RSS/Public-Diplomacy.xml'), - (u'Regulation', u'http://origin.heritage.org/static/RSS/Regulation.xml'), - (u'Religion and Civil Society', - u'http://origin.heritage.org/static/RSS/Religion-and-Civil-Society.xml'), - (u'Retirement Security', - u'http://origin.heritage.org/static/RSS/Retirement-Security.xml'), - (u'Space Policy', u'http://origin.heritage.org/static/RSS/Space-Policy.xml'), - (u'Taxes', u'http://origin.heritage.org/static/RSS/Taxes.xml'), - (u'Terrorism', u'http://origin.heritage.org/static/RSS/Terrorism.xml'), - (u'Trade', u'http://origin.heritage.org/static/RSS/Trade.xml'), - (u'Transportation', u'http://origin.heritage.org/static/RSS/Transportation.xml'), - (u'Welfare', u'http://origin.heritage.org/static/RSS/Welfare.xml'), - (u'Worldwide Freedom and Human Rights', - u'http://origin.heritage.org/static/RSS/Worldwide-Freedom-and-Human-Rights.xml'), - ] diff --git a/recipes/historias_del_mundo_es.recipe b/recipes/historias_del_mundo_es.recipe deleted file mode 100644 index 44e49db37d..0000000000 --- a/recipes/historias_del_mundo_es.recipe +++ /dev/null @@ -1,31 +0,0 @@ -# vim:fileencoding=utf-8 -from __future__ import unicode_literals -from calibre.web.feeds.news import BasicNewsRecipe - - -class HistoriasDelMundo (BasicNewsRecipe): - __author__ = 'Marc Busqué ' - __url__ = 'http://www.lamarciana.com' - __version__ = '1.0.1' - __license__ = 'GPL v3' - __copyright__ = '2012, Marc Busqué ' - title = u'Historias del Mundo' - description = u'Historias del Mundo contadas por Marc Busqué' - url = 'http://www.marcbusque.org' - language = 'es' - tags = 'viajes, social' - oldest_article = 120 - remove_empty_feeds = True - no_stylesheets = True - cover_url = u'http://www.marcbusque.org/wp-content/uploads/2011/12/cuchitril.png' - - def get_extra_css(self): - if not self.extra_css: - br = self.get_browser() - self.extra_css = br.open_novisit( - 'https://raw.githubusercontent.com/laMarciana/gutenweb/master/dist/gutenweb.css').read().replace('@charset "UTF-8";', '') - return self.extra_css - - feeds = [ - (u'Historias del Mundo', u'http://www.marcbusque.org/?feed=rss'), - ] diff --git a/recipes/histories_del_mon_ca.recipe b/recipes/histories_del_mon_ca.recipe deleted file mode 100644 index a9cf4e74bf..0000000000 --- a/recipes/histories_del_mon_ca.recipe +++ /dev/null @@ -1,31 +0,0 @@ -# vim:fileencoding=utf-8 -from __future__ import unicode_literals -from calibre.web.feeds.news import BasicNewsRecipe - - -class HistoriesDelMon (BasicNewsRecipe): - __author__ = 'Marc Busqué ' - __url__ = 'http://www.lamarciana.com' - __version__ = '1.0.1' - __license__ = 'GPL v3' - __copyright__ = '2012, Marc Busqué ' - title = u'Històries del Món' - description = u'Històries del Món explicades pel Marc Busqué' - url = 'http://www.marcbusque.org' - language = 'ca' - tags = 'viatges, social' - oldest_article = 120 - remove_empty_feeds = True - no_stylesheets = True - cover_url = u'http://www.marcbusque.org/wp-content/uploads/2011/12/cuchitril.png' - - def get_extra_css(self): - if not self.extra_css: - br = self.get_browser() - self.extra_css = br.open_novisit( - 'https://raw.githubusercontent.com/laMarciana/gutenweb/master/dist/gutenweb.css').read().replace('@charset "UTF-8";', '') - return self.extra_css - - feeds = [ - (u'Històries del Món', u'http://www.marcbusque.org/ca/feed/'), - ] diff --git a/recipes/kurier_galicyjski.recipe b/recipes/kurier_galicyjski.recipe deleted file mode 100644 index e51bae27ec..0000000000 --- a/recipes/kurier_galicyjski.recipe +++ /dev/null @@ -1,74 +0,0 @@ -from calibre.web.feeds.news import BasicNewsRecipe -from calibre.ebooks.BeautifulSoup import BeautifulSoup as bs, Comment - - -class KurierGalicyjski(BasicNewsRecipe): - title = u'Kurier Galicyjski' - __author__ = 'fenuks' - description = u'Kurier Galicyjski - największa gazeta dla Polaków na Ukrainie. Bieżące wydarzenia z życia polskiej mniejszości, historia, kultura, polityka, reportaże.' # noqa - category = 'news' - language = 'pl' - cover_url = 'http://www.duszki.pl/Kurier_galicyjski_bis2_small.gif' - oldest_article = 7 - max_articles_per_feed = 100 - remove_empty_feeds = True - no_stylesheets = True - keep_only_tags = [dict(attrs={'class': 'item-page'})] - remove_tags = [dict(attrs={'class': 'pagenav'}), dict(attrs={ - 'style': 'border-top-width: thin; border-top-style: dashed; border-top-color: #CCC; border-bottom-width: thin; border-bottom-style: dashed; border-bottom-color: #CCC; padding-top:5px; padding-bottom:5px; text-align:right; margin-top:10px; height:20px;'})] # noqa - feeds = [ - (u'Wydarzenia', u'http://kuriergalicyjski.com/index.php/wydarzenia?format=feed&type=atom'), - (u'Publicystyka', u'http://kuriergalicyjski.com/index.php/niezwykle-historie?format=feed&type=atom'), - (u'Reporta\u017ce', u'http://kuriergalicyjski.com/index.php/report?format=feed&type=atom'), - (u'Rozmowy Kuriera', u'http://kuriergalicyjski.com/index.php/kuriera?format=feed&type=atom'), - (u'Przegl\u0105d prasy', u'http://kuriergalicyjski.com/index.php/2012-01-05-14-08-55?format=feed&type=atom'), - (u'Kultura', u'http://kuriergalicyjski.com/index.php/2011-12-02-14-26-39?format=feed&type=atom'), - (u'Zabytki', u'http://kuriergalicyjski.com/index.php/2011-12-02-14-27-32?format=feed&type=atom'), - (u'Polska-Ukraina', u'http://kuriergalicyjski.com/index.php/pol-ua?format=feed&type=atom'), - (u'Polacy i Ukrai\u0144cy', u'http://kuriergalicyjski.com/index.php/polacy-i-ukr?format=feed&type=atom'), - (u'Niezwyk\u0142e historie', u'http://kuriergalicyjski.com/index.php/niezwykle-historie?format=feed&type=atom'), - (u'Polemiki', u'http://kuriergalicyjski.com/index.php/polemiki?format=feed&type=atom')] - - def append_page(self, soup, appendtag): - pager = soup.find(id='article-index') - if pager: - pager = pager.findAll('a')[1:] - if pager: - for a in pager: - nexturl = 'http://www.kuriergalicyjski.com' + a['href'] - soup2 = self.index_to_soup(nexturl) - pagetext = soup2.find(attrs={'class': 'item-page'}) - if pagetext.h2: - pagetext.h2.extract() - r = pagetext.find(attrs={'class': 'article-info'}) - if r: - r.extract() - pos = len(appendtag.contents) - appendtag.insert(pos, pagetext) - pos = len(appendtag.contents) - for r in appendtag.findAll(id='article-index'): - r.extract() - for r in appendtag.findAll(attrs={'class': 'pagenavcounter'}): - r.extract() - for r in appendtag.findAll(attrs={'class': 'pagination'}): - r.extract() - for r in appendtag.findAll(attrs={'class': 'pagenav'}): - r.extract() - for r in appendtag.findAll(attrs={'style': 'border-top-width: thin; border-top-style: dashed; border-top-color: #CCC; border-bottom-width: thin; border-bottom-style: dashed; border-bottom-color: #CCC; padding-top:5px; padding-bottom:5px; text-align:right; margin-top:10px; height:20px;'}): # noqa - r.extract() - comments = appendtag.findAll( - text=lambda text: isinstance(text, Comment)) - for comment in comments: - comment.extract() - - def preprocess_html(self, soup): - self.append_page(soup, soup.body) - for r in soup.findAll(style=True): - del r['style'] - for img in soup.findAll(attrs={'class': 'easy_img_caption smartresize'}): - img.insert(len(img.contents) - 1, bs('
')) - img.insert(len(img.contents), bs('

')) - for a in soup.findAll('a', href=True): - if a['href'].startswith('/'): - a['href'] = 'http://kuriergalicyjski.com' + a['href'] - return soup diff --git a/recipes/national_geographic_pl.recipe b/recipes/national_geographic_pl.recipe deleted file mode 100644 index 3b3c0bc989..0000000000 --- a/recipes/national_geographic_pl.recipe +++ /dev/null @@ -1,85 +0,0 @@ -# -*- coding: utf-8 -*- - -__license__ = 'GPL v3' -__copyright__ = 'Marcin Urban 2011' - -import re -from calibre.web.feeds.recipes import BasicNewsRecipe - - -class recipeMagic(BasicNewsRecipe): - title = 'National Geographic PL' - __author__ = 'Marcin Urban 2011' - __modified_by__ = 'fenuks' - description = u'Legenda wśród magazynów z historią sięgającą 120 lat' - oldest_article = 7 - max_articles_per_feed = 100 - no_stylesheets = True - use_embedded_content = False - encoding = 'utf8' - publisher = 'G+J Gruner+Jahr Polska' - category = 'news, PL,' - language = 'pl' - remove_empty_feeds = True - publication_type = 'newsportal' - extra_css = ''' body {font-family: verdana, arial, helvetica, geneva, sans-serif ;} - h1{text-align: center;} - h2{font-size: medium; font-weight: bold;} - .authordate {font-size: small; color: #696969;} - p.lead {font-weight: bold; text-align: center;} - .fot{font-size: x-small; color: #666666;} ''' - preprocess_regexps = [(re.compile(r'', re.DOTALL), lambda m: '')] - conversion_options = { - 'comments': description, 'tags': category, 'language': language, 'publisher': publisher, 'linearize_tables': True - } - - remove_tags = [ - dict(name='div', attrs={'class': 'add_inf'}), - dict(name='div', attrs={'class': 'add_f'}), - ] - - remove_attributes = ['width', 'height'] - feeds = [] - - def find_articles(self, url): - articles = [] - soup = self.index_to_soup(url) - tag = soup.find(attrs={'class': 'arl'}) - if not tag: - return articles - art = tag.ul.findAll('li') - for i in art: - title = i.a['title'] - url = i.a['href'] - # date=soup.find(id='footer').ul.li.string[41:-1] - desc = i.div.p.string - articles.append({'title': title, - 'url': url, - 'date': '', - 'description': desc - }) - return articles - - def parse_index(self): - feeds = [] - feeds.append((u"Aktualności", self.find_articles( - 'http://www.national-geographic.pl/aktualnosci/'))) - feeds.append((u"Artykuły", self.find_articles( - 'http://www.national-geographic.pl/artykuly/'))) - - return feeds - - def print_version(self, url): - if 'artykuly' in url: - return url.replace('artykuly/pokaz', 'drukuj-artykul') - elif 'aktualnosci' in url: - return url.replace('aktualnosci/pokaz', 'drukuj-artykul') - else: - return url - - def get_cover_url(self): - soup = self.index_to_soup( - 'http://www.national-geographic.pl/biezace-wydania/') - tag = soup.find(attrs={'class': 'txt jus'}) - self.cover_url = tag.img['src'] - return getattr(self, 'cover_url', self.cover_url)