diff --git a/recipes/icons/rabble_ca.png b/recipes/icons/rabble_ca.png deleted file mode 100644 index 3d471ac652..0000000000 Binary files a/recipes/icons/rabble_ca.png and /dev/null differ diff --git a/recipes/icons/radikal_tr.png b/recipes/icons/radikal_tr.png deleted file mode 100644 index 2684c85ed2..0000000000 Binary files a/recipes/icons/radikal_tr.png and /dev/null differ diff --git a/recipes/icons/radio_praha.png b/recipes/icons/radio_praha.png deleted file mode 100644 index c9f3e4f4ab..0000000000 Binary files a/recipes/icons/radio_praha.png and /dev/null differ diff --git a/recipes/icons/randerslokalavisen_dk.png b/recipes/icons/randerslokalavisen_dk.png deleted file mode 100644 index 70464fce89..0000000000 Binary files a/recipes/icons/randerslokalavisen_dk.png and /dev/null differ diff --git a/recipes/icons/realitatea.png b/recipes/icons/realitatea.png deleted file mode 100644 index 66fb668cf7..0000000000 Binary files a/recipes/icons/realitatea.png and /dev/null differ diff --git a/recipes/icons/rebelion.png b/recipes/icons/rebelion.png deleted file mode 100644 index 4f9e2ddb2f..0000000000 Binary files a/recipes/icons/rebelion.png and /dev/null differ diff --git a/recipes/icons/red_aragon.png b/recipes/icons/red_aragon.png deleted file mode 100644 index c9a1d9aa9c..0000000000 Binary files a/recipes/icons/red_aragon.png and /dev/null differ diff --git a/recipes/icons/replicavedetelor.png b/recipes/icons/replicavedetelor.png deleted file mode 100644 index 5bcc29d2a7..0000000000 Binary files a/recipes/icons/replicavedetelor.png and /dev/null differ diff --git a/recipes/icons/republica.png b/recipes/icons/republica.png deleted file mode 100644 index 6135647482..0000000000 Binary files a/recipes/icons/republica.png and /dev/null differ diff --git a/recipes/icons/reuters_ja.png b/recipes/icons/reuters_ja.png deleted file mode 100644 index d9dcfecc8f..0000000000 Binary files a/recipes/icons/reuters_ja.png and /dev/null differ diff --git a/recipes/icons/revista_bla.png b/recipes/icons/revista_bla.png deleted file mode 100644 index df144fe27f..0000000000 Binary files a/recipes/icons/revista_bla.png and /dev/null differ diff --git a/recipes/icons/revista_cromos.png b/recipes/icons/revista_cromos.png deleted file mode 100644 index 300c09c333..0000000000 Binary files a/recipes/icons/revista_cromos.png and /dev/null differ diff --git a/recipes/icons/revista_piaui.png b/recipes/icons/revista_piaui.png deleted file mode 100644 index 67d63e6cfb..0000000000 Binary files a/recipes/icons/revista_piaui.png and /dev/null differ diff --git a/recipes/icons/revista_semana.png b/recipes/icons/revista_semana.png deleted file mode 100644 index 953260fda6..0000000000 Binary files a/recipes/icons/revista_semana.png and /dev/null differ diff --git a/recipes/icons/revista_summa.png b/recipes/icons/revista_summa.png deleted file mode 100644 index 0d00979c98..0000000000 Binary files a/recipes/icons/revista_summa.png and /dev/null differ diff --git a/recipes/icons/rga.png b/recipes/icons/rga.png deleted file mode 100644 index 5695c3ef66..0000000000 Binary files a/recipes/icons/rga.png and /dev/null differ diff --git a/recipes/icons/rheinische_post.png b/recipes/icons/rheinische_post.png deleted file mode 100644 index 9c6f5d224c..0000000000 Binary files a/recipes/icons/rheinische_post.png and /dev/null differ diff --git a/recipes/icons/rian_eng.png b/recipes/icons/rian_eng.png deleted file mode 100644 index 0e22ab4a9b..0000000000 Binary files a/recipes/icons/rian_eng.png and /dev/null differ diff --git a/recipes/icons/rian_spa.png b/recipes/icons/rian_spa.png deleted file mode 100644 index 0e22ab4a9b..0000000000 Binary files a/recipes/icons/rian_spa.png and /dev/null differ diff --git a/recipes/icons/roger_ebert.png b/recipes/icons/roger_ebert.png deleted file mode 100644 index fbc8fea941..0000000000 Binary files a/recipes/icons/roger_ebert.png and /dev/null differ diff --git a/recipes/icons/roger_ebert_blog.png b/recipes/icons/roger_ebert_blog.png deleted file mode 100644 index fbc8fea941..0000000000 Binary files a/recipes/icons/roger_ebert_blog.png and /dev/null differ diff --git a/recipes/icons/rollingstone.png b/recipes/icons/rollingstone.png deleted file mode 100644 index f7ec38c97b..0000000000 Binary files a/recipes/icons/rollingstone.png and /dev/null differ diff --git a/recipes/icons/romanialibera.png b/recipes/icons/romanialibera.png deleted file mode 100644 index ae9cef2dc6..0000000000 Binary files a/recipes/icons/romanialibera.png and /dev/null differ diff --git a/recipes/icons/roskildelokalavisen_dk.png b/recipes/icons/roskildelokalavisen_dk.png deleted file mode 100644 index 70464fce89..0000000000 Binary files a/recipes/icons/roskildelokalavisen_dk.png and /dev/null differ diff --git a/recipes/icons/rubikon_de.png b/recipes/icons/rubikon_de.png deleted file mode 100644 index 872d9b7aaa..0000000000 Binary files a/recipes/icons/rubikon_de.png and /dev/null differ diff --git a/recipes/icons/rudersdallokalavisen_dk.png b/recipes/icons/rudersdallokalavisen_dk.png deleted file mode 100644 index 70464fce89..0000000000 Binary files a/recipes/icons/rudersdallokalavisen_dk.png and /dev/null differ diff --git a/recipes/icons/rue89.png b/recipes/icons/rue89.png deleted file mode 100644 index d5c8812044..0000000000 Binary files a/recipes/icons/rue89.png and /dev/null differ diff --git a/recipes/icons/rusiahoy.png b/recipes/icons/rusiahoy.png deleted file mode 100644 index 629f9364ee..0000000000 Binary files a/recipes/icons/rusiahoy.png and /dev/null differ diff --git a/recipes/icons/rynek_infrastruktury.png b/recipes/icons/rynek_infrastruktury.png deleted file mode 100644 index b3500d48b2..0000000000 Binary files a/recipes/icons/rynek_infrastruktury.png and /dev/null differ diff --git a/recipes/icons/rynek_zdrowia.png b/recipes/icons/rynek_zdrowia.png deleted file mode 100644 index e406f785d6..0000000000 Binary files a/recipes/icons/rynek_zdrowia.png and /dev/null differ diff --git a/recipes/icons/sabit_fikir.png b/recipes/icons/sabit_fikir.png deleted file mode 100644 index 87438987aa..0000000000 Binary files a/recipes/icons/sabit_fikir.png and /dev/null differ diff --git a/recipes/icons/sage_news.png b/recipes/icons/sage_news.png deleted file mode 100644 index 7094c2125b..0000000000 Binary files a/recipes/icons/sage_news.png and /dev/null differ diff --git a/recipes/icons/sage_news_opinion.png b/recipes/icons/sage_news_opinion.png deleted file mode 100644 index 9f029270ff..0000000000 Binary files a/recipes/icons/sage_news_opinion.png and /dev/null differ diff --git a/recipes/icons/salonica_press_news.png b/recipes/icons/salonica_press_news.png deleted file mode 100644 index 2eef9603d3..0000000000 Binary files a/recipes/icons/salonica_press_news.png and /dev/null differ diff --git a/recipes/icons/samanyolu_haber.png b/recipes/icons/samanyolu_haber.png deleted file mode 100644 index f34e9ec67b..0000000000 Binary files a/recipes/icons/samanyolu_haber.png and /dev/null differ diff --git a/recipes/icons/samanyolu_teknoloji.png b/recipes/icons/samanyolu_teknoloji.png deleted file mode 100644 index d95525c901..0000000000 Binary files a/recipes/icons/samanyolu_teknoloji.png and /dev/null differ diff --git a/recipes/icons/sarajevo_x.png b/recipes/icons/sarajevo_x.png deleted file mode 100644 index 30f8aceacc..0000000000 Binary files a/recipes/icons/sarajevo_x.png and /dev/null differ diff --git a/recipes/icons/sardinia_post.png b/recipes/icons/sardinia_post.png deleted file mode 100644 index 92316b99dd..0000000000 Binary files a/recipes/icons/sardinia_post.png and /dev/null differ diff --git a/recipes/icons/satira.png b/recipes/icons/satira.png deleted file mode 100644 index bb7b554880..0000000000 Binary files a/recipes/icons/satira.png and /dev/null differ diff --git a/recipes/icons/sb_nation.png b/recipes/icons/sb_nation.png deleted file mode 100644 index 7e28808d10..0000000000 Binary files a/recipes/icons/sb_nation.png and /dev/null differ diff --git a/recipes/icons/schattenblick.png b/recipes/icons/schattenblick.png deleted file mode 100644 index 8228dbc530..0000000000 Binary files a/recipes/icons/schattenblick.png and /dev/null differ diff --git a/recipes/icons/schwarzerpfeil.png b/recipes/icons/schwarzerpfeil.png deleted file mode 100644 index c8a4dafae8..0000000000 Binary files a/recipes/icons/schwarzerpfeil.png and /dev/null differ diff --git a/recipes/icons/science_news_recent_issues.png b/recipes/icons/science_news_recent_issues.png deleted file mode 100644 index 42a9dcf615..0000000000 Binary files a/recipes/icons/science_news_recent_issues.png and /dev/null differ diff --git a/recipes/icons/science_x.png b/recipes/icons/science_x.png new file mode 100644 index 0000000000..c3cdc0d8e9 Binary files /dev/null and b/recipes/icons/science_x.png differ diff --git a/recipes/icons/sciencedaily.png b/recipes/icons/sciencedaily.png deleted file mode 100644 index 26f6fbf80c..0000000000 Binary files a/recipes/icons/sciencedaily.png and /dev/null differ diff --git a/recipes/icons/seanhannity.png b/recipes/icons/seanhannity.png deleted file mode 100644 index ae2f7d3aaa..0000000000 Binary files a/recipes/icons/seanhannity.png and /dev/null differ diff --git a/recipes/icons/security_watch.png b/recipes/icons/security_watch.png deleted file mode 100644 index 0239b98052..0000000000 Binary files a/recipes/icons/security_watch.png and /dev/null differ diff --git a/recipes/icons/serverside.png b/recipes/icons/serverside.png deleted file mode 100644 index 32c36260b3..0000000000 Binary files a/recipes/icons/serverside.png and /dev/null differ diff --git a/recipes/icons/sg_hu.png b/recipes/icons/sg_hu.png deleted file mode 100644 index 11abff5824..0000000000 Binary files a/recipes/icons/sg_hu.png and /dev/null differ diff --git a/recipes/icons/shacknews.png b/recipes/icons/shacknews.png deleted file mode 100644 index ca9b3b5080..0000000000 Binary files a/recipes/icons/shacknews.png and /dev/null differ diff --git a/recipes/icons/shortlist.png b/recipes/icons/shortlist.png deleted file mode 100644 index 3008199aed..0000000000 Binary files a/recipes/icons/shortlist.png and /dev/null differ diff --git a/recipes/icons/sigma_live.png b/recipes/icons/sigma_live.png deleted file mode 100644 index 163ead7ceb..0000000000 Binary files a/recipes/icons/sigma_live.png and /dev/null differ diff --git a/recipes/icons/sign_on_sd.png b/recipes/icons/sign_on_sd.png deleted file mode 100644 index 96d2d682cf..0000000000 Binary files a/recipes/icons/sign_on_sd.png and /dev/null differ diff --git a/recipes/icons/silicon_republic.png b/recipes/icons/silicon_republic.png deleted file mode 100644 index 24d94eca6f..0000000000 Binary files a/recipes/icons/silicon_republic.png and /dev/null differ diff --git a/recipes/icons/singtao_daily.png b/recipes/icons/singtao_daily.png deleted file mode 100644 index 38d59d6db6..0000000000 Binary files a/recipes/icons/singtao_daily.png and /dev/null differ diff --git a/recipes/icons/siol.png b/recipes/icons/siol.png deleted file mode 100644 index 9fea79a23b..0000000000 Binary files a/recipes/icons/siol.png and /dev/null differ diff --git a/recipes/icons/sisainlive.png b/recipes/icons/sisainlive.png deleted file mode 100644 index e0e7f2d28d..0000000000 Binary files a/recipes/icons/sisainlive.png and /dev/null differ diff --git a/recipes/icons/sizinti_derigisi.png b/recipes/icons/sizinti_derigisi.png deleted file mode 100644 index 74f1fa50a4..0000000000 Binary files a/recipes/icons/sizinti_derigisi.png and /dev/null differ diff --git a/recipes/icons/skanderborglokalavisen_dk.png b/recipes/icons/skanderborglokalavisen_dk.png deleted file mode 100644 index 70464fce89..0000000000 Binary files a/recipes/icons/skanderborglokalavisen_dk.png and /dev/null differ diff --git a/recipes/icons/skylife.png b/recipes/icons/skylife.png deleted file mode 100644 index c441ee9c57..0000000000 Binary files a/recipes/icons/skylife.png and /dev/null differ diff --git a/recipes/icons/slate_star_codex.png b/recipes/icons/slate_star_codex.png deleted file mode 100644 index 3f5837b0bc..0000000000 Binary files a/recipes/icons/slate_star_codex.png and /dev/null differ diff --git a/recipes/icons/slovo.png b/recipes/icons/slovo.png deleted file mode 100644 index d127973523..0000000000 Binary files a/recipes/icons/slovo.png and /dev/null differ diff --git a/recipes/icons/sme.png b/recipes/icons/sme.png deleted file mode 100644 index a536b92581..0000000000 Binary files a/recipes/icons/sme.png and /dev/null differ diff --git a/recipes/icons/sn_dk.png b/recipes/icons/sn_dk.png deleted file mode 100644 index 6e425b13b2..0000000000 Binary files a/recipes/icons/sn_dk.png and /dev/null differ diff --git a/recipes/icons/snopes.png b/recipes/icons/snopes.png deleted file mode 100644 index 8447227580..0000000000 Binary files a/recipes/icons/snopes.png and /dev/null differ diff --git a/recipes/icons/socialdiva.png b/recipes/icons/socialdiva.png deleted file mode 100644 index f908a2ee62..0000000000 Binary files a/recipes/icons/socialdiva.png and /dev/null differ diff --git a/recipes/icons/soenderborglokalavisen_dk.png b/recipes/icons/soenderborglokalavisen_dk.png deleted file mode 100644 index 70464fce89..0000000000 Binary files a/recipes/icons/soenderborglokalavisen_dk.png and /dev/null differ diff --git a/recipes/icons/soldiers.png b/recipes/icons/soldiers.png deleted file mode 100644 index a05c0c210b..0000000000 Binary files a/recipes/icons/soldiers.png and /dev/null differ diff --git a/recipes/icons/something_awful.png b/recipes/icons/something_awful.png deleted file mode 100644 index ff05e0f53c..0000000000 Binary files a/recipes/icons/something_awful.png and /dev/null differ diff --git a/recipes/icons/sondagsavisen_dk.png b/recipes/icons/sondagsavisen_dk.png deleted file mode 100644 index efa280d18f..0000000000 Binary files a/recipes/icons/sondagsavisen_dk.png and /dev/null differ diff --git a/recipes/icons/southernstar.png b/recipes/icons/southernstar.png deleted file mode 100644 index 58f2efa494..0000000000 Binary files a/recipes/icons/southernstar.png and /dev/null differ diff --git a/recipes/icons/spin_magazine.png b/recipes/icons/spin_magazine.png deleted file mode 100644 index 703456057b..0000000000 Binary files a/recipes/icons/spin_magazine.png and /dev/null differ diff --git a/recipes/icons/sportowefakty.png b/recipes/icons/sportowefakty.png deleted file mode 100644 index 292b14dd79..0000000000 Binary files a/recipes/icons/sportowefakty.png and /dev/null differ diff --git a/recipes/icons/sporza_be.png b/recipes/icons/sporza_be.png deleted file mode 100644 index 4d72dd8c1e..0000000000 Binary files a/recipes/icons/sporza_be.png and /dev/null differ diff --git a/recipes/icons/sputnik.png b/recipes/icons/sputnik.png new file mode 100644 index 0000000000..c125dcc4fc Binary files /dev/null and b/recipes/icons/sputnik.png differ diff --git a/recipes/icons/stamgasten.png b/recipes/icons/stamgasten.png deleted file mode 100644 index c89c651c9e..0000000000 Binary files a/recipes/icons/stamgasten.png and /dev/null differ diff --git a/recipes/icons/standardmoney.png b/recipes/icons/standardmoney.png deleted file mode 100644 index fb33dc07c1..0000000000 Binary files a/recipes/icons/standardmoney.png and /dev/null differ diff --git a/recipes/icons/stars_and_stripes.png b/recipes/icons/stars_and_stripes.png deleted file mode 100644 index 6b8db24297..0000000000 Binary files a/recipes/icons/stars_and_stripes.png and /dev/null differ diff --git a/recipes/icons/starwars.png b/recipes/icons/starwars.png deleted file mode 100644 index f50730e6a4..0000000000 Binary files a/recipes/icons/starwars.png and /dev/null differ diff --git a/recipes/icons/stnn.png b/recipes/icons/stnn.png deleted file mode 100644 index 0e75b1ca3a..0000000000 Binary files a/recipes/icons/stnn.png and /dev/null differ diff --git a/recipes/icons/strategic_culture.png b/recipes/icons/strategic_culture.png deleted file mode 100644 index bdc3d8c050..0000000000 Binary files a/recipes/icons/strategic_culture.png and /dev/null differ diff --git a/recipes/icons/strategy-business.png b/recipes/icons/strategy-business.png deleted file mode 100644 index cfef7ee96f..0000000000 Binary files a/recipes/icons/strategy-business.png and /dev/null differ diff --git a/recipes/icons/sueddeutsche_mobil.png b/recipes/icons/sueddeutsche_mobil.png deleted file mode 100644 index ceb8b2d301..0000000000 Binary files a/recipes/icons/sueddeutsche_mobil.png and /dev/null differ diff --git a/recipes/icons/sueddeutschezeitung.png b/recipes/icons/sueddeutschezeitung.png deleted file mode 100644 index c6fb9cd4aa..0000000000 Binary files a/recipes/icons/sueddeutschezeitung.png and /dev/null differ diff --git a/recipes/icons/superbebe.png b/recipes/icons/superbebe.png deleted file mode 100644 index 5f2a6c5a78..0000000000 Binary files a/recipes/icons/superbebe.png and /dev/null differ diff --git a/recipes/icons/superesportes.png b/recipes/icons/superesportes.png deleted file mode 100644 index 88f9f1f4bd..0000000000 Binary files a/recipes/icons/superesportes.png and /dev/null differ diff --git a/recipes/icons/svd_se.png b/recipes/icons/svd_se.png deleted file mode 100644 index 0b5f08da72..0000000000 Binary files a/recipes/icons/svd_se.png and /dev/null differ diff --git a/recipes/icons/syddjurslokalavisen_dk.png b/recipes/icons/syddjurslokalavisen_dk.png deleted file mode 100644 index 70464fce89..0000000000 Binary files a/recipes/icons/syddjurslokalavisen_dk.png and /dev/null differ diff --git a/recipes/rabble_ca.recipe b/recipes/rabble_ca.recipe deleted file mode 100644 index 7d8d9db4fe..0000000000 --- a/recipes/rabble_ca.recipe +++ /dev/null @@ -1,56 +0,0 @@ -import re - -from calibre.web.feeds.news import BasicNewsRecipe - - -class RabbleCa(BasicNewsRecipe): - title = u'Rabble.ca' - __author__ = 'timtoo' - language = 'en_CA' - oldest_article = 7 - max_articles_per_feed = 100 - - cover_url = 'https://upload.wikimedia.org/wikipedia/en/4/44/Rabble.png' - masthead_url = 'http://rabble.ca/sites/rabble/files/dreamyrabble_logo.jpg' - - feeds = [(u'Rabble.ca', u'http://feeds.feedburner.com/rabble-news')] - - preprocess_regexps = [ - (re.compile(r'.*?to post comments', re.DOTALL | re.IGNORECASE), - lambda match: 'Tags:'), - ] - - extra_css = """ - .print-taxonomy { display: inline } - .print-taxonomy ul { display: inline; margin: 0px } - .print-taxonomy ul li { display: inline; list-style: none } - .field-type-date div { display: inline } - .field-type-link div { display: inline } - .field-type-text div { display: inline } - .field-label { font-style: italic } - """ - - def print_version(self, url): - return url.replace('http://rabble.ca/', 'http://rabble.ca/print/') - - remove_tags = [ - # print version of the web page - dict(name='div', attrs={'class': ['print-logo']}), - dict(name='div', attrs={'class': ['print-site_name']}), - dict(name='hr', attrs={'class': ['print-hr']}), - dict(name='div', attrs={'class': ['print-links']}), - - # regular web page in case you need to download them - dict(name='div', attrs={'id': ['header']}), - dict(name='div', attrs={'class': ['container-submenu']}), - dict(name='div', attrs={'id': ['sidebar']}), - dict(name='div', attrs={'id': ['footer']}), - dict(name='div', attrs={ - 'class': ['rabble-nodelinks rabble-nodelinks-top']}), - dict(name='div', attrs={ - 'class': ['rabble-nodelinks rabble-nodelinks-bottom']}), - dict(name='div', attrs={'class': ['tags-issues']}), - dict(name='div', attrs={ - 'class': ['field field-type-text field-field-summary']}), - dict(name='span', attrs={'class': ['print-footnote']}), - ] diff --git a/recipes/radikal_tr.recipe b/recipes/radikal_tr.recipe deleted file mode 100644 index 0212d591c5..0000000000 --- a/recipes/radikal_tr.recipe +++ /dev/null @@ -1,59 +0,0 @@ -__license__ = 'GPL v3' -__copyright__ = '2010-2014, Darko Miletic ' -''' -radikal.com.tr -''' - -from calibre.web.feeds.news import BasicNewsRecipe - - -class Radikal_tr(BasicNewsRecipe): - title = 'Radikal - Turkey' - __author__ = 'Darko Miletic' - description = 'News from Turkey' - publisher = 'radikal' - category = 'news, politics, Turkey' - oldest_article = 2 - max_articles_per_feed = 150 - no_stylesheets = True - use_embedded_content = False - remove_empty_feeds = True - auto_cleanup = False - masthead_url = 'http://www.radikal.com.tr/D/i/1/V2/radikal_logo.jpg' - language = 'tr' - publication_type = 'newspaper' - extra_css = """ @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} - body{font-family: 'PT Sans',Arial,Helvetica,sans1,sans-serif} - """ - - conversion_options = { - 'comment': description, 'tags': category, 'publisher': publisher, 'language': language - } - - remove_tags = [ - dict(name=['meta', 'iframe', 'embed', 'object', 'link', 'base']), - dict(name='div', attrs={ - 'class': ['options', 'news_related', 'browserWidth_shareBox']}), - dict(attrs={'class': ['breadcrumb clearfix', 'box_title']}) - ] - - keep_only_tags = [ - dict(attrs={'class': ['news-content-header', - 'news-content-text clearfix', - 'author-content-text', - 'news_detail_top', - 'news_article']}) - ] - - feeds = [ - - (u'Yazarlar', u'http://www.radikal.com.tr/d/rss/RssYazarlar.xml'), - (u'Türkiye', u'http://www.radikal.com.tr/d/rss/Rss_77.xml'), - (u'Politika', u'http://www.radikal.com.tr/d/rss/Rss_78.xml'), - (u'Dünya', u'http://www.radikal.com.tr/d/rss/Rss_81.xml'), - (u'Ekonomi', u'http://www.radikal.com.tr/d/rss/Rss_80.xml'), - (u'Radikal 2', u'http://www.radikal.com.tr/d/rss/Rss_42.xml'), - (u'Radikal Hayat', u'http://www.radikal.com.tr/d/rss/Rss_41.xml'), - (u'Radikal Kitap', u'http://www.radikal.com.tr/d/rss/Rss_40.xml'), - (u'Spor', u'http://www.radikal.com.tr/d/rss/Rss_84.xml') - ] diff --git a/recipes/radio_praha.recipe b/recipes/radio_praha.recipe deleted file mode 100644 index 0e6992f72d..0000000000 --- a/recipes/radio_praha.recipe +++ /dev/null @@ -1,45 +0,0 @@ -# -*- coding: utf-8 -*- - -from calibre.web.feeds.news import BasicNewsRecipe - - -class AdvancedUserRecipe1291540961(BasicNewsRecipe): - - title = u'Radio Praha' - __author__ = 'Francois Pellicaan' - description = u'Česká oficiální mezinárodní vysílací stanice.' - oldest_article = 7 - max_articles_per_feed = 100 - no_stylesheets = True - use_embedded_content = False - remove_empty_feeds = True - encoding = 'utf8' - publisher = u'Český rozhlas' - category = 'News' - language = 'cs' - publication_type = 'newsportal' - - extra_css = u'h1 .section { display: block; text-transform: uppercase; font-size: 10px; margin-top: 4em; } \n .title { font-size: 14px; margin-top: 4em; } \n a.photo { display: block; clear:both; } \n .caption { font-size: 9px; display: block; clear:both; padding:0px 0px 20px 0px; } \n a { font-type: normal; }' # noqa - - keep_only_tags = [ - dict(name='div', attrs={'class': ['main']}) - ] - remove_tags = [ - dict(name='div', attrs={'class': ['cleaner', 'options', 'toolsXXL']}), - dict(name='ul', attrs={'class': ['tools']}) - ] - feeds = [ - (u'Domácí politika', 'http://www.radio.cz/feeds/rss/cs/oblast/dompol.xml'), - (u'Společnost', 'http://www.radio.cz/feeds/rss/cs/oblast/spolecnost.xml'), - (u'Evropská unie', 'http://www.radio.cz/feeds/rss/cs/oblast/eu.xml'), - (u'Zahraniční politika', - 'http://www.radio.cz/feeds/rss/cs/oblast/zahrpol.xml'), - (u'Ekonomika', 'http://www.radio.cz/feeds/rss/cs/oblast/ekonomika.xml'), - (u'Kultura', 'http://www.radio.cz/feeds/rss/cs/oblast/kultura.xml'), - (u'Krajané', 'http://www.radio.cz/feeds/rss/cs/oblast/krajane.xml'), - (u'Historie', 'http://www.radio.cz/feeds/rss/cs/oblast/historie.xml'), - (u'Příroda', 'http://www.radio.cz/feeds/rss/cs/oblast/priroda.xml'), - (u'Věda', 'http://www.radio.cz/feeds/rss/cs/oblast/veda.xml'), - (u'Sport', 'http://www.radio.cz/feeds/rss/cs/oblast/sport.xml'), - (u'Cestování', 'http://www.radio.cz/feeds/rss/cs/oblast/cestovani.xml'), - ] diff --git a/recipes/randerslokalavisen_dk.recipe b/recipes/randerslokalavisen_dk.recipe deleted file mode 100644 index 8d88d75e8c..0000000000 --- a/recipes/randerslokalavisen_dk.recipe +++ /dev/null @@ -1,32 +0,0 @@ -#!/usr/bin/env python -# vim:fileencoding=utf-8 -# https://manual.calibre-ebook.com/news_recipe.html -from __future__ import absolute_import, division, print_function, unicode_literals - -from calibre.web.feeds.news import BasicNewsRecipe - -''' -Din avis Randers -''' - - -class RandersLokalavisen_dk(BasicNewsRecipe): - __author__ = 'CoderAllan.github.com' - title = 'Din avis Randers' - description = 'Lokale og regionale nyheder, sport, kultur fra Randers og omegn på dinavis.lokalavisen.dk' - category = 'newspaper, news, localnews, sport, culture, Denmark' - oldest_article = 7 - max_articles_per_feed = 50 - auto_cleanup = True - language = 'da' - - feeds = [ - ('Seneste nyt fra Din avis Randers', 'http://dinavis.lokalavisen.dk/section/senestenytrss'), - ('Seneste lokale nyheder fra Din avis Randers', 'http://dinavis.lokalavisen.dk/section/senestelokalenyhederrss'), - ('Seneste sport fra Din avis Randers', 'http://dinavis.lokalavisen.dk/section/senestesportrss'), - ('Seneste 112 nyheder fra Din avis Randers', 'http://dinavis.lokalavisen.dk/section/seneste112rss'), - ('Seneste kultur nyheder fra Din avis Randers', 'http://dinavis.lokalavisen.dk/section/senestekulturrss'), - ('Seneste læserbreve fra Din avis Randers', 'http://dinavis.lokalavisen.dk/section/senestelaeserbreverss'), - - ] - diff --git a/recipes/realitatea.recipe b/recipes/realitatea.recipe deleted file mode 100644 index 7272dde488..0000000000 --- a/recipes/realitatea.recipe +++ /dev/null @@ -1,40 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- - -__license__ = 'GPL v3' -__copyright__ = u'2011, Silviu Cotoar\u0103' -''' -realitatea.net -''' - -from calibre.web.feeds.news import BasicNewsRecipe - - -class Realitatea(BasicNewsRecipe): - title = 'Realitatea' - __author__ = u'Silviu Cotoar\u0103' - publisher = 'Realitatea' - description = u'\u0218tiri din Rom\u00e2nia' - oldest_article = 5 - language = 'ro' - max_articles_per_feed = 100 - no_stylesheets = True - use_embedded_content = False - category = 'Ziare,Stiri,Romania' - encoding = 'utf-8' - cover_url = 'http://assets.realitatea.ro/images/logo.jpg' - - conversion_options = { - 'comments': description, 'tags': category, 'language': language, 'publisher': publisher - } - - keep_only_tags = [ - dict(name='div', attrs={'class': 'articleTitle '}), dict( - name='div', attrs={'class': 'articleBody'}) - ] - - remove_tags = [dict(name='div', attrs={'id': 'aus'})] - feeds = [(u'\u0218tiri', u'http://realitatea.feedsportal.com/c/32533/fe.ed/rss.realitatea.net/stiri.xml')] - - def preprocess_html(self, soup): - return self.adeify_images(soup) diff --git a/recipes/rebelion.recipe b/recipes/rebelion.recipe deleted file mode 100644 index 88395fb91f..0000000000 --- a/recipes/rebelion.recipe +++ /dev/null @@ -1,37 +0,0 @@ -# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai -from __future__ import unicode_literals - -import re - -from calibre.web.feeds.news import BasicNewsRecipe - - -class RebelionRecipe (BasicNewsRecipe): - # Thanks to atlantique http://www.mobileread.com/forums/member.php?u=67876 - __author__ = u'Marc Busqué ' - __url__ = 'http://www.lamarciana.com' - __version__ = '1.0' - __license__ = 'GPL v3' - __copyright__ = '2012, Marc Busqué ' - title = u'Rebelion.org' - description = u'Rebelión pretende ser un medio de información alternativa que publique las noticias que no son consideradas importantes por los medios de comunicación tradicionales. También, dar a las noticias un tratamiento diferente en la línea de mostrar los intereses que los poderes económicos y políticos del mundo capitalista ocultan para mantener sus privilegios y el status actual. Queremos servir y ayudarnos de todos los grupos, colectivos y personas que trabajan por cambiar este mundo en una perspectiva radicalmente diferente, más justa, igualitaria y equilibrada social y ecológicamente. Es nuestro objetivo contar con la participación y colaboración de todos vosotros para que Rebelión sea un espacio serio, riguroso y actualizado en la difusión de noticias.' # noqa - url = 'http://www.rebelion.org' - language = 'es' - tags = 'contrainformación, información alternativa' - oldest_article = 1 - remove_empty_feeds = True - encoding = 'latin1' - keep_only_tags = [ - {'name': 'div', 'attrs': {'id': 'CuerpoNoticia'}} - ] - no_stylesheets = True - extra_css = '.autor {font-style: italic;} .titulo {font-size: 150%;} .titulo, .pretitulo {text-align: center;} #TextoNoticia {text-align:justify;} .autor, .fuente, .entradilla {font-size: 90%; text-align: left;}' # noqa - - feeds = [ - (u'Titulares del día', u'http://www.rebelion.org/rss_portada.php'), - ] - - # See http://www.mobileread.com/forums/showthread.php?t=174501 - def print_version(self, url): - id = re.compile(r'\d*$').search(url).group() - return u'http://www.rebelion.org/noticia.php?id=%s' % id diff --git a/recipes/red_aragon.recipe b/recipes/red_aragon.recipe deleted file mode 100644 index 70f82a9f88..0000000000 --- a/recipes/red_aragon.recipe +++ /dev/null @@ -1,51 +0,0 @@ -#!/usr/bin/env python -__license__ = 'GPL v3' -__copyright__ = '11 December 2010, desUBIKado' -__author__ = 'desUBIKado' -__description__ = 'Entertainment guide from Aragon' -__version__ = 'v0.01' -__date__ = '11, December 2010' -''' -[url]http://www.redaragon.es/[/url] -''' - -from calibre.web.feeds.news import BasicNewsRecipe - - -class heraldo(BasicNewsRecipe): - __author__ = 'desUBIKado' - description = u'Guia de ocio desde Aragon' - title = u'RedAragon' - publisher = 'Grupo Z' - category = 'Concerts, Movies, Entertainment news' - cover_url = 'http://www.redaragon.com/2008_img/logotipo.gif' - language = 'es' - timefmt = '[%a, %d %b, %Y]' - oldest_article = 15 - max_articles_per_feed = 100 - encoding = 'iso-8859-1' - use_embedded_content = False - remove_javascript = True - no_stylesheets = True - - feeds = [(u'Conciertos', u'http://redaragon.com/rss/agenda.asp?tid=1'), - (u'Exposiciones', u'http://redaragon.com/rss/agenda.asp?tid=5'), - (u'Teatro', u'http://redaragon.com/rss/agenda.asp?tid=10'), - (u'Conferencias', u'http://redaragon.com/rss/agenda.asp?tid=2'), - (u'Ferias', u'http://redaragon.com/rss/agenda.asp?tid=6'), - (u'Filmotecas/Cineclubs', - u'http://redaragon.com/rss/agenda.asp?tid=7'), - (u'Presentaciones', - u'http://redaragon.com/rss/agenda.asp?tid=9'), - (u'Fiestas', u'http://redaragon.com/rss/agenda.asp?tid=11'), - (u'Infantil', u'http://redaragon.com/rss/agenda.asp?tid=13'), - (u'Otros', u'http://redaragon.com/rss/agenda.asp?tid=8')] - - keep_only_tags = [dict(name='div', attrs={'id': 'FichaEventoAgenda'})] - - remove_tags = [dict(name='div', attrs={ - 'class': ['Comparte', 'CajaAgenda', 'Caja', 'Cintillo']})] - - remove_tags_before = dict(name='div', attrs={'id': 'FichaEventoAgenda'}) - - remove_tags_after = dict(name='div', attrs={'class': 'Cintillo'}) diff --git a/recipes/replicavedetelor.recipe b/recipes/replicavedetelor.recipe deleted file mode 100644 index 20e8c80220..0000000000 --- a/recipes/replicavedetelor.recipe +++ /dev/null @@ -1,50 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- - -__license__ = 'GPL v3' -__copyright__ = u'2011, ' -''' -replicavedetelor.ro -''' - -from calibre.web.feeds.news import BasicNewsRecipe - - -class ReplicaVedetelor(BasicNewsRecipe): - title = u'Replica Vedetelor' - __author__ = u'Silviu Cotoara' - description = u'Ofer\u0103 vedetelor dreptul la replic\u0103' - publisher = 'Replica Vedetelor' - oldest_article = 5 - language = 'ro' - max_articles_per_feed = 100 - no_stylesheets = True - use_embedded_content = False - category = 'Ziare,Reviste,Vedete' - encoding = 'utf-8' - cover_url = 'http://www.webart-software.eu/_pics/lucrari_referinta/medium/84/1-Replica-Vedetelor.jpg' - - conversion_options = { - 'comments': description, 'tags': category, 'language': language, 'publisher': publisher - } - - keep_only_tags = [ - dict(name='div', attrs={'id': 'zona-continut'}) - ] - - remove_tags = [ - dict(name='ul', attrs={'id': [ - 'lista-imagini']}), dict(name='form', attrs={'id': ['f-trimite-unui-prieten']}) - - ] - - remove_tags_after = [ - dict(name='form', attrs={'id': ['f-trimite-unui-prieten']}) - ] - - feeds = [ - (u'Feeds', u'http://www.replicavedetelor.ro/feed') - ] - - def preprocess_html(self, soup): - return self.adeify_images(soup) diff --git a/recipes/republica.recipe b/recipes/republica.recipe deleted file mode 100644 index 64b750b74a..0000000000 --- a/recipes/republica.recipe +++ /dev/null @@ -1,21 +0,0 @@ -from calibre.web.feeds.news import BasicNewsRecipe - - -class AdvancedUserRecipe1316862613(BasicNewsRecipe): - title = u'Republica' - __author__ = 'Manish Bhattarai' - description = 'News from the Republica' - language = 'en_NP' - masthead_url = 'http://blog.nyayahealth.org/wp-content/uploads/2011/03/myrepublica1.gif' - oldest_article = 1 - max_articles_per_feed = 100 - auto_cleanup = True - cover_url = 'http://www.myrepublica.com/repub_front.jpg' - feeds = [ - (u'Political Affairs', u'http://www.myrepublica.com/portal/news_rss.php?news_category_id=14'), - (u'Business & Economy', u'http://www.myrepublica.com/portal/news_rss.php?news_category_id=15'), - (u'International', u'http://www.myrepublica.com/portal/news_rss.php?news_category_id=21'), - - (u'Social Issues', u'http://www.myrepublica.com/portal/news_rss.php?news_category_id=16'), - (u'Sports', u'http://www.myrepublica.com/portal/news_rss.php?news_category_id=18'), - (u'Lifestyle', u'http://www.myrepublica.com/portal/news_rss.php?news_category_id=17')] diff --git a/recipes/republika.recipe b/recipes/republika.recipe deleted file mode 100644 index 0e4810b155..0000000000 --- a/recipes/republika.recipe +++ /dev/null @@ -1,70 +0,0 @@ -#!/usr/bin/env python - -__license__ = 'GPL v3' -__copyright__ = '2009, Darko Miletic ' - -''' -republika.co.yu -''' - -import re - -from calibre.web.feeds.news import BasicNewsRecipe - - -class Republika(BasicNewsRecipe): - title = 'Republika' - __author__ = 'Darko Miletic' - description = 'Glasilo gradjanskog samooslobadjanja. Protiv stihije straha, mrznje i nasilja' - publisher = ' Zadruga Res Publica' - category = 'news, politics, Serbia' - language = 'sr' - - lang = 'sr-Latn-RS' - oldest_article = 2 - max_articles_per_feed = 100 - no_stylesheets = True - encoding = 'cp1250' - use_embedded_content = False - INDEX = u'http://www.republika.co.yu/' - extra_css = ' @font-face {font-family: "serif1"; src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: serif1, serif} .naslov{font-size: x-large; font-weight: bold} .autor{font-size: small; font-weight: bold} ' # noqa - - conversion_options = { - 'comment': description, 'tags': category, 'publisher': publisher, 'language': lang, 'pretty_print': True - } - - preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')] - - keep_only_tags = [dict(attrs={'class': 'naslov'}), dict(attrs={'class': 'text1'}) - ] - - remove_tags = [dict(name=['object', 'link', 'iframe', 'base', 'img'])] - - feeds = [(u'Svi clanci', INDEX)] - - def preprocess_html(self, soup): - attribs = ['style', 'font', 'valign', 'colspan', 'width', 'height', 'rowspan', 'summary', 'align', 'cellspacing', 'cellpadding', 'frames', 'rules', 'border' ] # noqa - for item in soup.body.findAll(name=['table', 'td', 'tr', 'th', 'caption', 'thead', 'tfoot', 'tbody', 'colgroup', 'col']): - item.name = 'div' - for attrib in attribs: - item[attrib] = '' - del item[attrib] - return soup - - def parse_index(self): - totalfeeds = [] - lfeeds = self.get_feeds() - for feedobj in lfeeds: - feedtitle, feedurl = feedobj - self.report_progress(0, _('Fetching feed') + ' %s...' % - (feedtitle if feedtitle else feedurl)) - articles = [] - soup = self.index_to_soup(feedurl) - for item in soup.findAll('a', attrs={'class': 'naslovLink'}): - url = item['href'] - title = self.tag_to_string(item) - articles.append({ - 'title': title, 'date': '', 'url': url, 'description': '' - }) - totalfeeds.append((feedtitle, articles)) - return totalfeeds diff --git a/recipes/reuters_ja.recipe b/recipes/reuters_ja.recipe deleted file mode 100644 index d7c895cf36..0000000000 --- a/recipes/reuters_ja.recipe +++ /dev/null @@ -1,22 +0,0 @@ -from calibre.web.feeds.news import BasicNewsRecipe - - -class ReutersJa(BasicNewsRecipe): - - title = 'Reuters(Japan)' - description = 'Global news in Japanese' - __author__ = 'Hiroshi Miura' - use_embedded_content = False - language = 'ja' - max_articles_per_feed = 10 - remove_javascript = True - auto_cleanup = True - - feeds = [( - 'Top Stories', 'http://feeds.reuters.com/reuters/JPTopNews?format=xml'), - ('World News', 'http://feeds.reuters.com/reuters/JPWorldNews?format=xml'), - ('Business News', 'http://feeds.reuters.com/reuters/JPBusinessNews?format=xml'), - ('Technology News', 'http://feeds.reuters.com/reuters/JPTechnologyNews?format=xml'), - ('Oddly Enough News', - 'http://feeds.reuters.com/reuters/JPOddlyEnoughNews?format=xml') - ] diff --git a/recipes/revista_bla.recipe b/recipes/revista_bla.recipe deleted file mode 100644 index e03b47d88e..0000000000 --- a/recipes/revista_bla.recipe +++ /dev/null @@ -1,57 +0,0 @@ -#!/usr/bin/env python - -__license__ = 'GPL v3' -__author__ = '2010, Gustavo Azambuja ' -''' -http://www.revistabla.com -''' - -from calibre.web.feeds.news import BasicNewsRecipe - - -class Noticias(BasicNewsRecipe): - title = 'Revista Bla' - __author__ = 'Gustavo Azambuja' - description = 'Moda | Uruguay' - language = 'es_UY' - timefmt = '[%a, %d %b, %Y]' - use_embedded_content = False - recursion = 5 - encoding = 'utf8' - remove_javascript = True - no_stylesheets = True - - oldest_article = 20 - max_articles_per_feed = 100 - keep_only_tags = [dict(id=['body_container'])] - remove_tags = [ - dict(name='div', attrs={ - 'class': ['date_text', 'comments', 'form_section', 'share_it']}), - dict(name='div', attrs={ - 'id': ['relatedPosts', 'spacer', 'banner_izquierda', 'right_container']}), - dict(name='p', attrs={'class': 'FacebookLikeButton'}), - dict(name=['object', 'link'])] - - extra_css = ''' - h1{font-family:Geneva, Arial, Helvetica, sans-serif;color:#154B7A;} - h3{font-size: 14px;color:#999999; font-family:Geneva, Arial, Helvetica, sans-serif;font-weight: bold;} - h2{color:#666666; font-family:Geneva, Arial, Helvetica, sans-serif;font-size:small;} - p {font-family:Arial,Helvetica,sans-serif;} - ''' - feeds = [ - (u'Articulos', u'http://www.revistabla.com/feed/') - ] - - def get_cover_url(self): - cover_url = None - index = 'http://www.revistabla.com' - soup = self.index_to_soup(index) - link_item = soup.find('div', attrs={'class': 'header_right'}) - if link_item: - cover_url = link_item.img['src'] - return cover_url - - def preprocess_html(self, soup): - for item in soup.findAll(style=True): - del item['style'] - return soup diff --git a/recipes/revista_cromos.recipe b/recipes/revista_cromos.recipe deleted file mode 100644 index dc8b3a4ce6..0000000000 --- a/recipes/revista_cromos.recipe +++ /dev/null @@ -1,38 +0,0 @@ -# coding=utf-8 -# https://github.com/iemejia/calibrecolombia - -''' -http://www.cromos.com.co/ -''' - -from calibre.web.feeds.news import BasicNewsRecipe - - -class ElMalpensante(BasicNewsRecipe): - title = u'Revista Cromos' - language = 'es_CO' - __author__ = 'Ismael Mejia ' - cover_url = 'http://www.cromos.com.co/sites/cromos.com.co/themes/cromos_theme/images/logo_morado.gif' - description = 'Revista Cromos' - oldest_article = 7 - simultaneous_downloads = 20 - use_embedded_content = True - remove_empty_feeds = True - max_articles_per_feed = 100 - feeds = [(u'Cromos', u'http://www.cromos.com.co/rss.xml'), - (u'Moda', u'http://www.cromos.com.co/moda/feed'), - (u'Estilo de Vida', - u'http://www.cromos.com.co/estilo-de-vida/feed'), - (u'Cuidado Personal', - u'http://www.cromos.com.co/estilo-de-vida/cuidado-personal/feed'), - (u'Salud y Alimentación', - u'http://www.cromos.com.co/estilo-de-vida/salud-y-alimentacion/feed'), - (u'Personajes', u'http://www.cromos.com.co/personajes/feed'), - (u'Actualidad', - u'http://www.cromos.com.co/personajes/actualidad/feed'), - (u'Espectáculo', - u'http://www.cromos.com.co/personajes/espectaculo/feed'), - (u'Reportajes', u'http://www.cromos.com.co/reportajes/feed'), - (u'Eventos', u'http://www.cromos.com.co/eventos/feed'), - (u'Modelos', u'http://www.cromos.com.co/modelos/feed'), - ] diff --git a/recipes/revista_piaui.recipe b/recipes/revista_piaui.recipe deleted file mode 100644 index c6014fdc9c..0000000000 --- a/recipes/revista_piaui.recipe +++ /dev/null @@ -1,34 +0,0 @@ -# -*- coding: utf-8 -*- -from calibre.web.feeds.news import BasicNewsRecipe - - -class RevistaPiaui(BasicNewsRecipe): - title = u'Revista piau\xed' - language = 'pt_BR' - __author__ = u'Eduardo Gustini Simões' - oldest_article = 31 - max_articles_per_feed = 50 - auto_cleanup = True - - feeds = [(u'Edi\xe7\xe3o Atual', - u'http://revistapiaui.estadao.com.br/feed/rss/edicao-atual.xml')] - - def parse_feeds(self): - feeds = BasicNewsRecipe.parse_feeds(self) - for feed in feeds: - for article in feed.articles[:]: - soup = self.index_to_soup( - 'http://revistapiaui.estadao.com.br/feed/rss/edicao-atual.xml') - itemTitle = article.title.partition('|')[0].rstrip() - item = soup.find(text=itemTitle) - articleDescription = item.parent.parent.description.string.partition( - '
')[2] - article.summary = articleDescription - - return feeds - - def populate_article_metadata(self, article, soup, first): - h2 = soup.find('h2') - h2.string.replaceWith(h2.string.partition('|')[0].rstrip()) - h2.replaceWith(h2.prettify() + '

' + article.summary + '

' + - ' posted at ' + article.localtime.strftime('%d-%m-%Y') + '

') diff --git a/recipes/revista_semana.recipe b/recipes/revista_semana.recipe deleted file mode 100644 index c36aea8dd0..0000000000 --- a/recipes/revista_semana.recipe +++ /dev/null @@ -1,11 +0,0 @@ -from calibre.web.feeds.news import BasicNewsRecipe - - -class AdvancedUserRecipe1317341570(BasicNewsRecipe): - title = u'Revista Semana' - __author__ = 'BIGO-CAVA' - language = 'es_CO' - oldest_article = 7 - max_articles_per_feed = 100 - - feeds = [(u'Revista Semana', u'http://www.semana.com/rss/Semana_OnLine.xml')] diff --git a/recipes/revista_summa.recipe b/recipes/revista_summa.recipe deleted file mode 100644 index 6fa8eec8ad..0000000000 --- a/recipes/revista_summa.recipe +++ /dev/null @@ -1,22 +0,0 @@ -__license__ = 'GPL v3' -__author__ = 'Vakya' -__version__ = 'v1.0' -__date__ = '14, May 2012' - -from calibre.web.feeds.news import BasicNewsRecipe - - -class AdvancedUserRecipe1336226255(BasicNewsRecipe): - - title = u'Revista Summa' - publisher = u'Summa' - __author__ = 'Vakya' - description = 'Informacion regional sobre economia y negocios' - language = 'es' - - oldest_article = 15 - max_articles_per_feed = 100 - auto_cleanup = True - remove_tags_before = dict(name='h1') - remove_tags_after = dict(name='label') - feeds = [(u'Revista Summa', u'http://www.revistasumma.com/rss/rss-v2.0.rss')] diff --git a/recipes/rga.recipe b/recipes/rga.recipe deleted file mode 100644 index 1d968edd7b..0000000000 --- a/recipes/rga.recipe +++ /dev/null @@ -1,60 +0,0 @@ -#!/usr/bin/env python - -__license__ = 'GPL v3' -__copyright__ = '2009, W. Gerard ' -''' -rga-online.de -''' - -from calibre.web.feeds.news import BasicNewsRecipe - - -class rga_onliner(BasicNewsRecipe): - title = 'RGA Online - German' - __author__ = 'Werner Gerard' - description = "E-Zeitung aus RSS-Artikeln zusammengestellt." - publisher = 'RGA-Online' - category = 'Nachrichten, RGA' - oldest_article = 3 - max_articles_per_feed = 100 - language = 'de' - - lang = 'de-DE' - no_stylesheets = True - use_embedded_content = False - encoding = 'cp1252' - - remove_tags_before = dict(name='span', attrs={'class': 'headgross'}) - remove_tags_after = dict(name='br', attrs={'clear': 'all'}) - -# remove_tags_after = dict(name='br', attrs={'clear':'clear'}) - - feeds = [ - ('RGA-Online Remscheid', 'http://www.rga-online.de/rss/rs_news.php'), - ('RGA-Online Wermelskirchen', - 'http://www.rga-online.de/rss/wk_news.php'), - ('RGA-Online Hueckeswagen', - 'http://www.rga-online.de/rss/hk_news.php'), - ('RGA-Online Radevormwald', - 'http://www.rga-online.de/rss/rz_news.php'), - ('RGA-Online Tagesthemen', - 'http://www.rga-online.de/rss/tt_news.php'), - ('RGA-Online Brennpunkte', - 'http://www.rga-online.de/rss/br_news.php'), - ('RGA-Online Sport', - 'http://www.rga-online.de/rss/spo_news.php'), - ('RGA-Online Lokalsport', - 'http://www.rga-online.de/rss/sp_news.php'), - ('RGA-Online Bergisches Land', - 'http://www.rga-online.de/rss/bg_news.php'), - ('RGA-Online Bergische Wirtschaft', - 'http://www.rga-online.de/rss/bw_news.php') - ] - - def get_cover_url(self): - return 'http://rga.werner-gerard.de/rga.jpg' - - def postprocess_html(self, soup, first): - for tag in soup.findAll(name=['table', 'tr', 'td']): - tag.name = 'span' - return soup diff --git a/recipes/rheinische_post.recipe b/recipes/rheinische_post.recipe deleted file mode 100644 index b0f96cd105..0000000000 --- a/recipes/rheinische_post.recipe +++ /dev/null @@ -1,57 +0,0 @@ -from calibre.web.feeds.recipes import BasicNewsRecipe - - -class AdvancedUserRecipe(BasicNewsRecipe): - - title = u'RP-online' - __author__ = 'schuster' - oldest_article = 2 - max_articles_per_feed = 100 - no_stylesheets = True - use_embedded_content = False - language = 'de' - remove_javascript = True - masthead_url = 'http://www.die-zeitungen.de/uploads/pics/LOGO_RP_ONLINE_01.jpg' - cover_url = 'http://www.manroland.com/com/pressinfo_images/com/RheinischePost_Logo_300dpi.jpg' - extra_css = ''' - h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;} - h4{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;} - img {min-width:300px; max-width:600px; min-height:300px; max-height:800px} - p{font-family:Arial,Helvetica,sans-serif;font-size:small;} - body{font-family:Helvetica,Arial,sans-serif;font-size:small;} - ''' - remove_tags_before = dict(id='article_content') - remove_tags_after = dict(id='article_content') - remove_tags = [dict(attrs={'class': ['goodies', 'left', 'right', 'clear-all', 'teaser anzeigenwerbung', 'lesermeinung', 'goodiebox', 'goodiebox 1', 'goodiebox 2', 'goodiebox 3', 'boxframe', 'link']}), # noqa - dict(id=['click_Fotos_link']), - dict(name=['script', 'noscript', 'style', '_top', 'click_Fotos_link'])] - - feeds = [ (u'Top-News', u'http://www.ngz-online.de/app/feed/rss/topnews'), - (u'Politik', u'http://www.ngz-online.de/app/feed/rss/politik'), - (u'Wirtschaft', u'http://www.ngz-online.de/app/feed/rss/wirtschaft'), - (u'Panorama', u'http://www.ngz-online.de/app/feed/rss/panorama'), - (u'Sport', u'http://www.ngz-online.de/app/feed/rss/sport'), - (u'Tour de France', u'http://www.ngz-online.de/app/feed/rss/tourdefrance'), - (u'Fußball', u'http://www.ngz-online.de/app/feed/rss/fussball'), - (u'Fußball BuLi', u'http://www.ngz-online.de/app/feed/rss/bundesliga'), - (u'Formel 1', u'http://www.ngz-online.de/app/feed/rss/formel1'), - (u'US-Sport', u'http://www.ngz-online.de/app/feed/rss/us-sports'), - (u'Boxen', u'http://www.ngz-online.de/app/feed/rss/boxen'), - (u'Eishockey', u'http://www.ngz-online.de/app/feed/rss/eishockey'), - (u'Basketball', u'http://www.ngz-online.de/app/feed/rss/basketball'), - (u'Handball', u'http://www.ngz-online.de/app/feed/rss/handball'), - (u'Motorsport', u'http://www.ngz-online.de/app/feed/rss/motorsport'), - (u'Tennis', u'http://www.ngz-online.de/app/feed/rss/tennis'), - (u'Radsport', u'http://www.ngz-online.de/app/feed/rss/radsport'), - (u'Kultur', u'http://www.ngz-online.de/app/feed/rss/kultur'), - (u'Gesellschaft', u'http://www.ngz-online.de/app/feed/rss/gesellschaft'), - (u'Wissenschaft', u'http://www.ngz-online.de/app/feed/rss/wissen'), - (u'Gesundheit', u'http://www.ngz-online.de/app/feed/rss/gesundheit'), - (u'Digitale Welt', u'http://www.ngz-online.de/app/feed/rss/digitale'), - (u'Auto & Mobil', u'http://www.ngz-online.de/app/feed/rss/auto'), - (u'Reise & Welt', u'http://www.ngz-online.de/app/feed/rss/reise'), - (u'Beruf & Karriere', u'http://www.ngz-online.de/app/feed/rss/beruf'), - (u'Herzrasen', u'http://www.ngz-online.de/app/feed/rss/herzrasen'), - (u'About a Boy', u'http://www.ngz-online.de/app/feed/rss/about_a_boy'), - - ] diff --git a/recipes/rian_spa.recipe b/recipes/rian_spa.recipe deleted file mode 100644 index 0af285d66f..0000000000 --- a/recipes/rian_spa.recipe +++ /dev/null @@ -1,33 +0,0 @@ - -__license__ = 'GPL v3' -__copyright__ = '2009-2010, Darko Miletic ' -''' -sp.rian.ru -''' - -from calibre.web.feeds.news import BasicNewsRecipe - - -class Ria_esp(BasicNewsRecipe): - title = 'Ria Novosti' - __author__ = 'Darko Miletic' - description = 'Noticias desde Russia en Castellano' - language = 'es' - publisher = 'sp.rian.ru' - category = 'news, politics, Russia' - oldest_article = 3 - max_articles_per_feed = 100 - no_stylesheets = True - use_embedded_content = False - encoding = 'utf-8' - - conversion_options = { - 'comment': description, 'tags': category, 'publisher': publisher, 'language': language - } - - keep_only_tags = [dict( - name='div', attrs={'class': ['mainnewsrubric', 'titleblock', 'mainnewstxt']})] - remove_tags = [dict(name=['object', 'link', 'iframe', 'base'])] - - feeds = [ - (u'Noticias', u'http://rss.feedsportal.com/c/860/fe.ed/sp.rian.ru/export/rss2/index.xml')] diff --git a/recipes/roger_ebert.recipe b/recipes/roger_ebert.recipe deleted file mode 100644 index 689a631eb8..0000000000 --- a/recipes/roger_ebert.recipe +++ /dev/null @@ -1,108 +0,0 @@ -import re - -from calibre.web.feeds.news import BasicNewsRecipe - - -class Ebert(BasicNewsRecipe): - title = 'Roger Ebert' - __author__ = 'Shane Erstad' - description = 'Roger Ebert Movie Reviews' - publisher = 'Chicago Sun Times' - category = 'movies' - oldest_article = 8 - max_articles_per_feed = 100 - no_stylesheets = True - use_embedded_content = False - encoding = 'utf-8' - masthead_url = 'http://rogerebert.suntimes.com/graphics/global/roger.jpg' - language = 'en' - remove_empty_feeds = False - PREFIX = 'http://rogerebert.suntimes.com' - patternReviews = r'(.*?).*?
(.*?)
(.*?)' - patternCommentary = r'
.*?(.*?).*?
(.*?)
' # noqa - patternPeople = r'
.*?(.*?).*?
(.*?)
' # noqa - patternGlossary = r'
.*?(.*?).*?
(.*?)
' # noqa - - conversion_options = { - 'comment': description, 'tags': category, 'publisher': publisher, 'language': language, 'linearize_tables': True - } - - feeds = [ - - (u'Reviews', u'http://rogerebert.suntimes.com/apps/pbcs.dll/section?category=reviews'), - (u'Commentary', u'http://rogerebert.suntimes.com/apps/pbcs.dll/section?category=COMMENTARY'), - (u'Great Movies', u'http://rogerebert.suntimes.com/apps/pbcs.dll/section?category=REVIEWS08'), - (u'People', u'http://rogerebert.suntimes.com/apps/pbcs.dll/section?category=PEOPLE'), - (u'Glossary', u'http://rogerebert.suntimes.com/apps/pbcs.dll/section?category=GLOSSARY') - - ] - - preprocess_regexps = [ - (re.compile(r'.*?This is a printer friendly.*?.*?
', re.DOTALL | re.IGNORECASE), - lambda m: '') - ] - - def print_version(self, url): - return url + '&template=printart' - - def parse_index(self): - totalfeeds = [] - lfeeds = self.get_feeds() - for feedobj in lfeeds: - feedtitle, feedurl = feedobj - self.log('\tFeedurl: ', feedurl) - self.report_progress(0, _('Fetching feed') + ' %s...' % - (feedtitle if feedtitle else feedurl)) - articles = [] - page = self.index_to_soup(feedurl, raw=True) - - if feedtitle == 'Reviews' or feedtitle == 'Great Movies': - pattern = self.patternReviews - elif feedtitle == 'Commentary': - pattern = self.patternCommentary - elif feedtitle == 'People': - pattern = self.patternPeople - elif feedtitle == 'Glossary': - pattern = self.patternGlossary - - regex = re.compile(pattern, re.IGNORECASE | re.DOTALL) - - for match in regex.finditer(page): - if feedtitle == 'Reviews' or feedtitle == 'Great Movies': - movietitle = match.group(1) - thislink = match.group(2) - description = match.group(3) - elif feedtitle == 'Commentary' or feedtitle == 'People' or feedtitle == 'Glossary': - thislink = match.group(1) - description = match.group(2) - - self.log(thislink) - soup = self.index_to_soup(thislink) - for link in soup.findAll('a', href=True): - thisurl = self.PREFIX + link['href'] - thislinktext = self.tag_to_string(link) - - if feedtitle == 'Reviews' or feedtitle == 'Great Movies': - thistitle = movietitle - elif feedtitle == 'Commentary' or feedtitle == 'People' or feedtitle == 'Glossary': - thistitle = thislinktext - - if thistitle == '': - thistitle = 'Ebert Journal Post' - - r""" - pattern2 = r'AID=\/(.*?)\/' - reg2 = re.compile(pattern2, re.IGNORECASE|re.DOTALL) - match2 = reg2.search(thisurl) - date = match2.group(1) - c = time.strptime(match2.group(1),"%Y%m%d") - date=time.strftime("%a, %b %d, %Y", c) - self.log(date) - """ - - articles.append({ - 'title': thistitle, 'date': '', 'url': thisurl, 'description': description - }) - totalfeeds.append((feedtitle, articles)) - - return totalfeeds diff --git a/recipes/roger_ebert_blog.recipe b/recipes/roger_ebert_blog.recipe deleted file mode 100644 index fc7aa2230f..0000000000 --- a/recipes/roger_ebert_blog.recipe +++ /dev/null @@ -1,125 +0,0 @@ -import re -import time - -from calibre import strftime -from calibre.web.feeds.news import BasicNewsRecipe - -''' - Help Needed: - Still can't figure out why I'm getting strange characters. Esp. the Great Movies descriptions in the TOC. - Anyone help me figure that out? - - Change Log: - 2011-02-19: Version 2: Added "Oscars" section and fixed date problem -''' - - -class Ebert(BasicNewsRecipe): - title = 'Roger Ebert' - __author__ = 'Shane Erstad' - version = 2 - description = 'Roger Ebert Movie Reviews' - publisher = 'Chicago Sun Times' - category = 'movies' - oldest_article = 8 - max_articles_per_feed = 100 - no_stylesheets = True - use_embedded_content = False - encoding = 'UTF-8' - masthead_url = 'http://rogerebert.suntimes.com/graphics/global/roger.jpg' - language = 'en' - remove_empty_feeds = False - PREFIX = 'http://rogerebert.suntimes.com' - patternReviews = r'(.*?).*?
(.*?)
(.*?)
' - patternCommentary = r'
.*?(.*?).*?
(.*?)
' # noqa - patternPeople = r'
.*?(.*?).*?
(.*?)
' # noqa - patternOscars = r'
.*?(.*?).*?
(.*?)
' # noqa - patternGlossary = r'
.*?(.*?).*?
(.*?)
' # noqa - - conversion_options = { - 'comment': description, 'tags': category, 'publisher': publisher, 'language': language, 'linearize_tables': True - } - - feeds = [ - - (u'Reviews', u'http://rogerebert.suntimes.com/apps/pbcs.dll/section?category=reviews'), - (u'Commentary', u'http://rogerebert.suntimes.com/apps/pbcs.dll/section?category=COMMENTARY'), - (u'Great Movies', u'http://rogerebert.suntimes.com/apps/pbcs.dll/section?category=REVIEWS08'), - (u'People', u'http://rogerebert.suntimes.com/apps/pbcs.dll/section?category=PEOPLE'), - (u'Oscars', u'http://rogerebert.suntimes.com/apps/pbcs.dll/section?category=OSCARS'), - (u'Glossary', u'http://rogerebert.suntimes.com/apps/pbcs.dll/section?category=GLOSSARY') - - ] - - preprocess_regexps = [ - (re.compile(r'.*?This is a printer friendly.*?.*?
', re.DOTALL | re.IGNORECASE), - lambda m: '') - ] - - def print_version(self, url): - return url + '&template=printart' - - def parse_index(self): - totalfeeds = [] - lfeeds = self.get_feeds() - for feedobj in lfeeds: - feedtitle, feedurl = feedobj - self.log('\tFeedurl: ', feedurl) - self.report_progress(0, _('Fetching feed') + ' %s...' % - (feedtitle if feedtitle else feedurl)) - articles = [] - page = self.index_to_soup(feedurl, raw=True) - - if feedtitle == 'Reviews' or feedtitle == 'Great Movies': - pattern = self.patternReviews - elif feedtitle == 'Commentary': - pattern = self.patternCommentary - elif feedtitle == 'People': - pattern = self.patternPeople - elif feedtitle == 'Glossary': - pattern = self.patternGlossary - elif feedtitle == 'Oscars': - pattern = self.patternOscars - - regex = re.compile(pattern, re.IGNORECASE | re.DOTALL) - - for match in regex.finditer(page): - if feedtitle == 'Reviews' or feedtitle == 'Great Movies': - movietitle = match.group(1) - thislink = match.group(2) - description = match.group(3) - elif feedtitle == 'Commentary' or feedtitle == 'People' or feedtitle == 'Glossary' or feedtitle == 'Oscars': - thislink = match.group(1) - description = match.group(2) - - self.log(thislink) - soup = self.index_to_soup(thislink) - - for link in soup.findAll('a', href=True): - thisurl = self.PREFIX + link['href'] - thislinktext = self.tag_to_string(link) - - if feedtitle == 'Reviews' or feedtitle == 'Great Movies': - thistitle = movietitle - elif feedtitle == 'Commentary' or feedtitle == 'People' or feedtitle == 'Glossary' or feedtitle == 'Oscars': - thistitle = thislinktext - - if thistitle == '': - continue - - pattern2 = r'AID=\/(.*?)\/' - reg2 = re.compile(pattern2, re.IGNORECASE | re.DOTALL) - match2 = reg2.search(thisurl) - if match2: - c = time.strptime(match2.group(1), "%Y%m%d") - mydate = strftime("%A, %B %d, %Y", c) - else: - mydate = strftime("%A, %B %d, %Y") - self.log(mydate) - - articles.append({ - 'title': thistitle, 'date': ' [' + mydate + ']', 'url': thisurl, 'description': description - }) - totalfeeds.append((feedtitle, articles)) - - return totalfeeds diff --git a/recipes/rollingstone.recipe b/recipes/rollingstone.recipe deleted file mode 100644 index 8a5e1d7593..0000000000 --- a/recipes/rollingstone.recipe +++ /dev/null @@ -1,28 +0,0 @@ -__license__ = 'GPL v3' -__copyright__ = '2010, Darko Miletic ' -''' -rollingstone.com -''' - -from calibre.web.feeds.news import BasicNewsRecipe - - -class RollingStone(BasicNewsRecipe): - title = 'Rolling Stone Magazine - free content' - __author__ = 'Darko Miletic' - description = 'Rolling Stone Magazine features music, album and artist news, movie reviews, political, economic and pop culture commentary, videos, photos, and more.' # noqa - publisher = 'Werner Media inc.' - category = 'news, music, USA, world' - oldest_article = 15 - max_articles_per_feed = 200 - no_stylesheets = True - encoding = 'utf8' - use_embedded_content = False - language = 'en' - remove_empty_feeds = True - publication_type = 'magazine' - auto_cleanup = True - - feeds = [ - (u'All News', u'http://www.rollingstone.com/siteServices/rss/allNews'), - ] diff --git a/recipes/romanialibera.recipe b/recipes/romanialibera.recipe deleted file mode 100644 index 667013bf6f..0000000000 --- a/recipes/romanialibera.recipe +++ /dev/null @@ -1,48 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- - -__license__ = 'GPL v3' -__copyright__ = u'2011, Silviu Cotoar\u0103' -''' -romanialibera.ro -''' - -from calibre.web.feeds.news import BasicNewsRecipe - - -class RomaniaLibera(BasicNewsRecipe): - title = u'Rom\u00e2nia Liber\u0103' - __author__ = u'Silviu Cotoar\u0103' - description = u'Rom\u00e2nia Liber\u0103' - publisher = u'Rom\u00e2nia Liber\u0103' - oldest_article = 5 - language = 'ro' - max_articles_per_feed = 100 - no_stylesheets = True - use_embedded_content = False - category = 'Ziare,Stiri' - encoding = 'utf-8' - cover_url = 'http://www.romanialibera.ro/templates/lilac/images/sigla_1.gif' - - conversion_options = { - 'comments': description, 'tags': category, 'language': language, 'publisher': publisher - } - - keep_only_tags = [ - dict(name='div', attrs={'id': 'articol'}) - ] - - remove_tags = [ - dict(name='div', attrs={'id': ['art_actions']}), dict(name='div', attrs={'class': ['stats']}), dict(name='div', attrs={'class': ['data']}), dict(name='div', attrs={'class': ['autori']}), dict(name='div', attrs={'class': ['banda_explicatii_text']}), dict(name='td', attrs={'class': ['connect_widget_vertical_center connect_widget_button_cell']}), dict(name='div', attrs={'class': ['aceeasi_tema']}), dict(name='div', attrs={'class': ['art_after_text']}), dict(name='div', attrs={'class': ['navigare']}), dict(name='div', attrs={'id': ['art_text_left']}) # noqa - ] - - remove_tags_after = [ - dict(name='div', attrs={'class': 'art_after_text'}) - ] - - feeds = [ - (u'Feeds', u'http://www.romanialibera.ro/rss.xml') - ] - - def preprocess_html(self, soup): - return self.adeify_images(soup) diff --git a/recipes/roskildelokalavisen_dk.recipe b/recipes/roskildelokalavisen_dk.recipe deleted file mode 100644 index 64e32346d1..0000000000 --- a/recipes/roskildelokalavisen_dk.recipe +++ /dev/null @@ -1,32 +0,0 @@ -#!/usr/bin/env python -# vim:fileencoding=utf-8 -# https://manual.calibre-ebook.com/news_recipe.html -from __future__ import absolute_import, division, print_function, unicode_literals - -from calibre.web.feeds.news import BasicNewsRecipe - -''' -Roskilde Avis -''' - - -class RoskildeLokalavisen_dk(BasicNewsRecipe): - __author__ = 'CoderAllan.github.com' - title = 'Roskilde Avis' - description = 'Lokale og regionale nyheder, sport og kultur fra Roskilde og omegn på roskilde.lokalavisen.dk' - category = 'newspaper, news, localnews, sport, culture, Denmark' - oldest_article = 7 - max_articles_per_feed = 50 - auto_cleanup = True - language = 'da' - - feeds = [ - ('Seneste nyt fra Roskilde Avis', 'http://roskilde.lokalavisen.dk/section/senestenytrss'), - ('Seneste lokale nyheder fra Roskilde Avis', 'http://roskilde.lokalavisen.dk/section/senestelokalenyhederrss'), - ('Seneste sport fra Roskilde Avis', 'http://roskilde.lokalavisen.dk/section/senestesportrss'), - ('Seneste 112 nyheder fra Roskilde Avis', 'http://roskilde.lokalavisen.dk/section/seneste112rss'), - ('Seneste kultur nyheder fra Roskilde Avis', 'http://roskilde.lokalavisen.dk/section/senestekulturrss'), - ('Seneste læserbreve fra Roskilde Avis', 'http://roskilde.lokalavisen.dk/section/senestelaeserbreverss'), - - ] - diff --git a/recipes/rubikon_de.recipe b/recipes/rubikon_de.recipe deleted file mode 100644 index 3dce614ff9..0000000000 --- a/recipes/rubikon_de.recipe +++ /dev/null @@ -1,33 +0,0 @@ -from __future__ import absolute_import, division, print_function, unicode_literals - -from calibre.web.feeds.news import BasicNewsRecipe - - -class AdvancedUserRecipe1543143461(BasicNewsRecipe): - title = 'Rubikon.de' - description = 'Nachrichten anders/neutral beleuchtet' - __author__ = 'schuster' - __license__ = 'GPL v3' - version = 1 - oldest_article = 7 - max_articles_per_feed = 100 - auto_cleanup = True - no_stylesheets = True - use_embedded_content = False - language = 'de' - remove_javascript = True - timefmt = ' [%d.%m.%Y]' - masthead_url = 'https://www.rubikon.news/assets/logo-dd0fcd373a0c872bb432f7596d9e700155c5d7fa07ec99a3777d44621e8c61fe.svg' - - remove_tags = [ - dict(id=['download-pdf']), - dict(id=['read-article']), - dict(name='div', attrs={ - 'id': ['print_options', 'print_head']}), - dict(name='div', attrs={'class': ['article-meta']}), - dict(name='div', attrs={'class': ['article-end']}), - dict(name='span', attrs={'class': ['lens']})] - - feeds = [ - ('Alle Artikel', 'https://www.rubikon.news/artikel.atom'), - ] diff --git a/recipes/rudersdallokalavisen_dk.recipe b/recipes/rudersdallokalavisen_dk.recipe deleted file mode 100644 index 0ac78fbd2d..0000000000 --- a/recipes/rudersdallokalavisen_dk.recipe +++ /dev/null @@ -1,32 +0,0 @@ -#!/usr/bin/env python -# vim:fileencoding=utf-8 -# https://manual.calibre-ebook.com/news_recipe.html -from __future__ import absolute_import, division, print_function, unicode_literals - -from calibre.web.feeds.news import BasicNewsRecipe - -''' -Rudersdal Avis -''' - - -class RudersdalLokalavisen_dk(BasicNewsRecipe): - __author__ = 'CoderAllan.github.com' - title = 'Rudersdal Avis' - description = 'Lokale, regionale nyheder, sport, kultur fra Rudersdal, Birkerød, Holte, Nærum, Vedbæk, Søllerød på rudersdal.lokalavisen.dk' - category = 'newspaper, news, localnews, sport, culture, Denmark' - oldest_article = 7 - max_articles_per_feed = 50 - auto_cleanup = True - language = 'da' - - feeds = [ - ('Seneste nyt fra Rudersdal Avis', 'http://rudersdal.lokalavisen.dk/section/senestenytrss'), - ('Seneste lokale nyheder fra Rudersdal Avis', 'http://rudersdal.lokalavisen.dk/section/senestelokalenyhederrss'), - ('Seneste sport fra Rudersdal Avis', 'http://rudersdal.lokalavisen.dk/section/senestesportrss'), - ('Seneste 112 nyheder fra Rudersdal Avis', 'http://rudersdal.lokalavisen.dk/section/seneste112rss'), - ('Seneste kultur nyheder fra Rudersdal Avis', 'http://rudersdal.lokalavisen.dk/section/senestekulturrss'), - ('Seneste læserbreve fra Rudersdal Avis', 'http://rudersdal.lokalavisen.dk/section/senestelaeserbreverss'), - - ] - diff --git a/recipes/rue89.recipe b/recipes/rue89.recipe deleted file mode 100644 index 4ef4166790..0000000000 --- a/recipes/rue89.recipe +++ /dev/null @@ -1,80 +0,0 @@ -__license__ = 'GPL v3' -__copyright__ = '2010-2012, Louis Gesbert ' -''' -Rue89 -''' - -__author__ = '2010-2012, Louis Gesbert ' - -import re - -from calibre.web.feeds.news import BasicNewsRecipe - - -class Rue89(BasicNewsRecipe): - title = 'Rue89' - __author__ = 'Louis Gesbert' - description = 'Popular free french news website' - title = u'Rue89' - language = 'fr' - oldest_article = 7 - max_articles_per_feed = 50 - - use_embedded_content = False - - # From http://www.rue89.com/les-flux-rss-de-rue89 - feeds = [ - (u'La Une', u'http://www.rue89.com/feed'), - # Other feeds disabled, 'La Une' seems to include them all - # (u'Rue69', u'http://www.rue89.com/rue69/feed'), - # (u'Eco', u'http://www.rue89.com/rue89-eco/feed'), - # (u'Planète', u'http://www.rue89.com/rue89-planete/feed'), - # (u'Sport', u'http://www.rue89.com/rue89-sport/feed'), - # (u'Culture', u'http://www.rue89.com/culture/feed'), - # (u'Hi-tech', u'http://www.rue89.com/hi-tech/feed'), - # (u'Media', u'http://www.rue89.com/medias/feed'), - # (u'Monde', u'http://www.rue89.com/monde/feed'), - # (u'Politique', u'http://www.rue89.com/politique/feed'), - # (u'Societe', u'http://www.rue89.com/societe/feed'), - ] - - # Follow redirection from feedsportal.com - def get_article_url(self, article): - return self.browser.open_novisit(article.link).geturl() - - def print_version(self, url): - return url + '?imprimer=1' - - conversion_options = {'smarten_punctuation': True} - - keep_only_tags = [ - dict(name='div', attrs={'id': 'content'}), - ] - - remove_tags_after = [ - dict(name='div', attrs={'id': 'plus_loin'}), - dict(name='div', attrs={'class': 'stats'}), - ] - - remove_tags = [ - dict(name='div', attrs={'id': 'article_tools'}), - dict(name='div', attrs={'id': 'plus_loin'}), - dict(name='div', attrs={'class': 'stats'}), - dict(name='div', attrs={'class': 'tools'}), - ] - - extra_css = "#content { padding: 0 0; }" - - # Without this, parsing of video articles returns strange results - preprocess_regexps = [ - (re.compile(r'', re.IGNORECASE | re.DOTALL), ''), - ] - - def preprocess_html(self, soup): - # Remove whole article if it's a "zapnet" (video) - if soup.find('h1', {'class': 'zapnet_title'}): - return None - # Reduce h2 titles to h3 - for title in soup.findAll('h2'): - title.name = 'h3' - return soup diff --git a/recipes/rusiahoy.recipe b/recipes/rusiahoy.recipe deleted file mode 100644 index 113afc5308..0000000000 --- a/recipes/rusiahoy.recipe +++ /dev/null @@ -1,43 +0,0 @@ -__license__ = 'GPL v3' -__copyright__ = '2010, Darko Miletic ' -''' -rusiahoy.com -''' - -from calibre.web.feeds.news import BasicNewsRecipe - - -class RusiaHoy(BasicNewsRecipe): - title = 'Rusia Hoy' - __author__ = 'Darko Miletic' - description = 'Noticias de Russia en castellano' - publisher = 'rusiahoy.com' - category = 'news, politics, Russia' - oldest_article = 7 - max_articles_per_feed = 200 - no_stylesheets = True - encoding = 'utf8' - use_embedded_content = False - language = 'es' - remove_empty_feeds = True - extra_css = """ - body{font-family: Arial,sans-serif } - .article_article_title{font-size: xx-large; font-weight: bold} - .article_date{color: black; font-size: small} - """ - - conversion_options = { - 'comment': description, 'tags': category, 'publisher': publisher, 'language': language - } - - remove_tags = [ - dict(name=['meta', 'link', 'iframe', 'base', 'object', 'embed'])] - keep_only_tags = [dict(attrs={'class': ['article_rubric_title', 'article_date', 'article_article_title', 'article_article_lead']}), dict(attrs={'class': 'article_article_text'}) ] # noqa - remove_attributes = ['align', 'width', 'height'] - - feeds = [(u'Articulos', u'http://rusiahoy.com/xml/index.xml')] - - def preprocess_html(self, soup): - for item in soup.findAll(style=True): - del item['style'] - return soup diff --git a/recipes/rynek_infrastruktury.recipe b/recipes/rynek_infrastruktury.recipe deleted file mode 100644 index 6c981640b6..0000000000 --- a/recipes/rynek_infrastruktury.recipe +++ /dev/null @@ -1,38 +0,0 @@ -#!/usr/bin/env python - -__license__ = 'GPL v3' -__author__ = 'teepel ' - -''' -http://www.rynekinfrastruktury.pl -''' - -from calibre.web.feeds.news import BasicNewsRecipe - - -class prawica_recipe(BasicNewsRecipe): - title = u'Rynek Infrastruktury' - __author__ = 'teepel ' - language = 'pl' - description = u'Portal "Rynek Infrastruktury" to źródło informacji o kluczowych elementach polskiej gospodarki: drogach, kolei, lotniskach, portach, telekomunikacji, energetyce, prawie i polityce, wzmocnione eksperckimi komentarzami kluczowych analityków.' # noqa - remove_empty_feeds = True - oldest_article = 1 - max_articles_per_feed = 50 - remove_javascript = True - no_stylesheets = True - - feeds = [ - (u'Drogi', u'http://www.rynekinfrastruktury.pl/rss/drogi.xml'), - (u'Kolej', u'http://www.rynekinfrastruktury.pl/rss/kolej.xml'), - (u'Energetyka', u'http://www.rynekinfrastruktury.pl/rss/energetyka.xml') - # no news in these feeds since 4 years: - # (u'Porty i lotniska', u'http://www.rynekinfrastruktury.pl/rss/porty-i-lotniska.xml'), - # (u'Komentarze', u'http://www.rynekinfrastruktury.pl/rss/komentarze-i-felietony.xml'), - ] - - keep_only_tags = [ - dict(name='h1', attrs={'class': 'wiadTit'}), - dict(name='div', attrs={'class': ['wiadSzczegol', 'multimediaWiadomosci', 'wiadTresc']}) - ] - - remove_tags = [dict(name='span', attrs={'class': 'kom'})] diff --git a/recipes/rynek_zdrowia.recipe b/recipes/rynek_zdrowia.recipe deleted file mode 100644 index fff9041c6e..0000000000 --- a/recipes/rynek_zdrowia.recipe +++ /dev/null @@ -1,35 +0,0 @@ -from calibre.web.feeds.news import BasicNewsRecipe - - -class rynekzdrowia(BasicNewsRecipe): - title = u'Rynek Zdrowia' - __author__ = u'spi630' - language = 'pl' - masthead_url = 'http://k.rynekzdrowia.pl/images/headerLogo.png' - cover_url = 'http://k.rynekzdrowia.pl/images/headerLogo.png' - oldest_article = 3 - max_articles_per_feed = 25 - no_stylesheets = True - auto_cleanup = True - remove_empty_feeds = True - - remove_tags_before = dict(name='h3') - - feeds = [ - (u'Finanse i Zarz\u0105dzanie', u'http://www.rynekzdrowia.pl/Kanal/finanse.html'), - (u'Inwestycje', u'http://www.rynekzdrowia.pl/Kanal/inwestycje.html'), - (u'Aparatura i wyposa\u017cenie', u'http://www.rynekzdrowia.pl/Kanal/aparatura.html'), - (u'Informatyka', u'http://www.rynekzdrowia.pl/Kanal/informatyka.html'), - (u'Prawo', u'http://www.rynekzdrowia.pl/Kanal/prawo.html'), - (u'Polityka zdrowotna', u'http://www.rynekzdrowia.pl/Kanal/polityka_zdrowotna.html'), - - (u'Ubezpieczenia Zdrowotne', u'http://www.rynekzdrowia.pl/Kanal/ubezpieczenia.html'), - (u'Farmacja', u'http://www.rynekzdrowia.pl/Kanal/farmacja.html'), - (u'Badania i rozw\xf3j', u'http://www.rynekzdrowia.pl/Kanal/badania.html'), - (u'Nauka', u'http://www.rynekzdrowia.pl/Kanal/nauka.html'), - (u'Po godzinach', u'http://www.rynekzdrowia.pl/Kanal/godziny.html'), - (u'Us\u0142ugi medyczne', u'http://www.rynekzdrowia.pl/Kanal/uslugi.html')] - - def print_version(self, url): - url = url.replace('.html', ',drukuj.html') - return url diff --git a/recipes/sa_gazeta.recipe b/recipes/sa_gazeta.recipe deleted file mode 100644 index aa465b530a..0000000000 --- a/recipes/sa_gazeta.recipe +++ /dev/null @@ -1,18 +0,0 @@ -#!/usr/bin/env python -# vim:fileencoding=utf-8 -from __future__ import absolute_import, division, print_function, unicode_literals - -from calibre.web.feeds.news import AutomaticNewsRecipe - - -class BasicUserRecipe1501589847(AutomaticNewsRecipe): - title = 'Sa gazeta' - oldest_article = 30 - max_articles_per_feed = 100 - auto_cleanup = True - language = 'sc' - __author__ = 'tzium' - - feeds = [ - ('Sa gazeta', 'http://www.sagazeta.info/feeds/posts/default'), - ] diff --git a/recipes/sabit_fikir.recipe b/recipes/sabit_fikir.recipe deleted file mode 100644 index a42b6ed393..0000000000 --- a/recipes/sabit_fikir.recipe +++ /dev/null @@ -1,14 +0,0 @@ -# -*- coding: utf-8 -*- - -from calibre.web.feeds.news import BasicNewsRecipe - - -class BasicUserRecipe1325259641(BasicNewsRecipe): - language = 'tr' - __author__ = 'asalet_r' - title = u'Sabit Fikir' - oldest_article = 7 - max_articles_per_feed = 20 - auto_cleanup = True - - feeds = [(u'Sabit Fikir', u'http://www.sabitfikir.com/rss.xml')] diff --git a/recipes/sage_news.recipe b/recipes/sage_news.recipe deleted file mode 100644 index b65ca524c0..0000000000 --- a/recipes/sage_news.recipe +++ /dev/null @@ -1,33 +0,0 @@ -from calibre.web.feeds.news import BasicNewsRecipe - - -class AdvancedUserRecipe1292550626(BasicNewsRecipe): - title = 'The Sage News - Satire' - __author__ = 'Brian Hahn' - description = 'News without boundaries, Satire' - oldest_article = 200 - max_articles_per_feed = 150 - no_stylesheets = True - use_embedded_content = False - publisher = 'The Sage News Network' - category = 'News, Alberta, Canada' - language = 'en_CA' - encoding = 'iso-8859-1' - cover_url = 'http://www.sagenews.ca/images/satire-cover.jpg' - remove_tags_before = dict(id='ContentPanel') - remove_tags_after = dict(id='ContentPanel') - remove_tags = [dict(name='div', attrs={'id': 'BottomAds'}), dict(name='div', attrs={ - 'id': 'moreStories'}), dict(name='div', attrs={'id': 'StoryNavigation'})] - extra_css = 'img { margin:5px }' - feeds = [ - ('Satire World', 'http://www.sagenews.ca/Satire-World.rss'), - ('Satire Politics', 'http://www.sagenews.ca/Satire-Politics.rss'), - ('Satire Justice', 'http://www.sagenews.ca/Satire-Justice.rss'), - ('Satire Health', 'http://www.sagenews.ca/Satire-Health.rss'), - ('Satire Environment', 'http://www.sagenews.ca/Satire-Environment.rss'), - ('Satire Living', 'http://www.sagenews.ca/Satire-Living.rss'), - ('Satire Sports', 'http://www.sagenews.ca/Satire-Sports.rss'), - ('Satire Business', 'http://www.sagenews.ca/Satire-Business.rss'), - ('Satire Agriculture', 'http://www.sagenews.ca/Satire-Agriculture.rss'), - ('Satire Humour', 'http://www.sagenews.ca/Satire-Humour.rss'), - ] diff --git a/recipes/sage_news_opinion.recipe b/recipes/sage_news_opinion.recipe deleted file mode 100644 index 141837b12a..0000000000 --- a/recipes/sage_news_opinion.recipe +++ /dev/null @@ -1,37 +0,0 @@ -from calibre.web.feeds.news import BasicNewsRecipe - - -class AdvancedUserRecipe1292550626(BasicNewsRecipe): - title = 'The Sage News - Opinion' - __author__ = 'Brian Hahn' - description = 'News without boundaries, Opinion' - oldest_article = 200 - max_articles_per_feed = 150 - no_stylesheets = True - use_embedded_content = False - publisher = 'The Sage News Network' - category = 'News, Alberta, Canada' - language = 'en_CA' - encoding = 'iso-8859-1' - cover_url = 'http://www.sagenews.ca/images/opinion-cover.jpg' - remove_tags_before = dict(id='ContentPanel') - remove_tags_after = dict(id='ContentPanel') - remove_tags = [dict(name='div', attrs={'id': 'BottomAds'}), dict(name='div', attrs={ - 'id': 'moreStories'}), dict(name='div', attrs={'id': 'StoryNavigation'})] - extra_css = 'img { margin:5px }' - feeds = [ - ('Editorial Comment', 'http://www.sagenews.ca/Editorial%20Comment.rss'), - ('Grumpy Old Man', 'http://www.sagenews.ca/Grumpy%20Old%20Man.rss'), - ('Bad Girl', 'http://www.sagenews.ca/Bad%20Girl.rss'), - ('Around the Edges with Dixie', - 'http://www.sagenews.ca/Around%20the%20Edges%20with%20Dixie.rss'), - ('Man Vs. World', 'http://www.sagenews.ca/Man%20Vs.%20World.rss'), - ('Opinion World', 'http://www.sagenews.ca/Opinion-World.rss'), - ('Opinion Politics', 'http://www.sagenews.ca/Opinion-Politics.rss'), - ('Opinion Justice', 'http://www.sagenews.ca/Opinion-Justice.rss'), - ('Opinion Health', 'http://www.sagenews.ca/Opinion-Health.rss'), - ('Opinion Environment', 'http://www.sagenews.ca/Opinion-Environment.rss'), - ('Opinion Living', 'http://www.sagenews.ca/Opinion-Living.rss'), - ('Opinion Sports', 'http://www.sagenews.ca/Opinion-Sports.rss'), - ('Opinion Business', 'http://www.sagenews.ca/Opinion-Business.rss'), - ] diff --git a/recipes/salonica_press_news.recipe b/recipes/salonica_press_news.recipe deleted file mode 100644 index 30864d9249..0000000000 --- a/recipes/salonica_press_news.recipe +++ /dev/null @@ -1,35 +0,0 @@ -from calibre.web.feeds.news import BasicNewsRecipe - - -class spn(BasicNewsRecipe): - title = u'Salonica Press News' - language = 'gr' - __author__ = "SteliosGero" - oldest_article = 3 - max_articles_per_feed = 100 - auto_cleanup = True - category = 'news, GR' - language = 'el' - - feeds = [ - (u'\u03a0\u03bf\u03bb\u03b9\u03c4\u03b9\u03ba\u03ae', u'http://www.spnews.gr/politiki?format=feed&type=rss'), - (u'\u039f\u03b9\u03ba\u03bf\u03bd\u03bf\u03bc\u03af\u03b1', u'http://www.spnews.gr/oikonomia?format=feed&type=rss'), - (u'\u0391\u03c5\u03c4\u03bf\u03b4\u03b9\u03bf\u03af\u03ba\u03b7\u03c3\u03b7', u'http://www.spnews.gr/aftodioikisi?format=feed&type=rss'), - (u'\u039a\u03bf\u03b9\u03bd\u03c9\u03bd\u03af\u03b1', u'http://www.spnews.gr/koinonia?format=feed&type=rss'), - (u'\u0391\u03b8\u03bb\u03b7\u03c4\u03b9\u03c3\u03bc\u03cc\u03c2', u'http://www.spnews.gr/sports?format=feed&type=rss'), - (u'\u0394\u03b9\u03b5\u03b8\u03bd\u03ae', u'http://www.spnews.gr/diethni?format=feed&type=rss'), - (u'\u03a0\u03bf\u03bb\u03b9\u03c4\u03b9\u03c3\u03bc\u03cc\u03c2', u'http://www.spnews.gr/politismos?format=feed&type=rss'), - (u'Media', u'http://www.spnews.gr/media-news?format=feed&type=rss'), - (u'\u0396\u03c9\u03ae', u'http://www.spnews.gr/zoi?format=feed&type=rss'), - - (u'\u03a4\u03b5\u03c7\u03bd\u03bf\u03bb\u03bf\u03b3\u03af\u03b1', u'http://spnews.gr/texnologia?format=feed&type=rss'), - (u'\u03a0\u03b5\u03c1\u03b9\u03b2\u03ac\u03bb\u03bb\u03bf\u03bd', u'http://spnews.gr/periballon?format=feed&type=rss'), - (u'\u03a0\u03b1\u03c1\u03b1\u03c0\u03bf\u03bb\u03b9\u03c4\u03b9\u03ba\u03ac', u'http://spnews.gr/parapolitika?format=feed&type=rss'), - (u'\u03a0\u03b1\u03c1\u03b1\u03b4\u03b7\u03bc\u03bf\u03c4\u03b9\u03ba\u03ac', u'http://spnews.gr/paradimotika?format=feed&type=rss'), - (u'\u03a0\u03b1\u03c1\u03b1\u03b1\u03b8\u03bb\u03b7\u03c4\u03b9\u03ba\u03ac', u'http://spnews.gr/parathlitika?format=feed&type=rss'), - (u'\u0391\u03c0\u03cc\u03c8\u03b5\u03b9\u03c2', u'http://spnews.gr/apopseis?format=feed&type=rss'), - (u'\u03a3\u03c5\u03bd\u03b5\u03cd\u03be\u03b5\u03b9\u03c2', u'http://spnews.gr/synenteykseis?format=feed&type=rss'), - (u'Alert!', u'http://spnews.gr/alert?format=feed&type=rss')] - - def print_version(self, url): - return url + '?tmpl=component&print=1&layout=default&page=' diff --git a/recipes/samanyolu_haber.recipe b/recipes/samanyolu_haber.recipe deleted file mode 100644 index 14143caac8..0000000000 --- a/recipes/samanyolu_haber.recipe +++ /dev/null @@ -1,53 +0,0 @@ -# -*- coding: utf-8 -*- - -from calibre.web.feeds.news import BasicNewsRecipe - - -class SHaber (BasicNewsRecipe): - - title = u'Samanyolu Haber' - __author__ = u'thomass' - description = ' Samanyolu Haber Sitesinden günlük haberler ' - oldest_article = 2 - max_articles_per_feed = 100 - no_stylesheets = True - auto_cleanup = True - encoding = 'utf-8' - publisher = 'thomass' - category = 'güncel, haber, türkçe' - language = 'tr' - publication_type = 'newspaper' - - conversion_options = { - 'tags': category, 'language': language, 'publisher': publisher, 'linearize_tables': True - } - extra_css = ' .Haber-Baslik-Yazisi {font-weight: bold; font-size: 9px} .Haber-Ozet-Yazisi{ font-family:sans-serif;font-weight: normal;font-size: 11px } #Haber{ font-family:sans-serif;font-weight: normal;font-size: 9px }.KirmiziText{ font-weight: normal;font-size: 5px }' # noqa - - cover_img_url = 'http://www.samanyoluhaber.com/include/logo.png' - masthead_url = 'http://www.samanyoluhaber.com/include/logo.png' - remove_empty_feeds = True - - feeds = [ - (u'Son Dakika', u'http://podcast.samanyoluhaber.com/sondakika.rss'), - (u'Gündem', u'http://podcast.samanyoluhaber.com/gundem.rss'), - (u'Politika ', u'http://podcast.samanyoluhaber.com/politika.rss'), - (u'Ekonomi', u'http://podcast.samanyoluhaber.com/ekonomi.rss'), - (u'Dünya', u'http://podcast.samanyoluhaber.com/dunya.rss'), - (u'Spor ', u'http://podcast.samanyoluhaber.com/spor.rss'), - (u'Sağlık', u'http://podcast.samanyoluhaber.com/saglik.rss'), - (u'Kültür', u'http://podcast.samanyoluhaber.com/kultur.rss'), - (u'Eğitim', u'http://podcast.samanyoluhaber.com/egitim.rss'), - (u'Ramazan', u'http://podcast.samanyoluhaber.com/ramazan.rss'), - (u'Yazarlar ', u'http://podcast.samanyoluhaber.com/yazarlar.rss'), - - - - ] - - def preprocess_html(self, soup): - for alink in soup.findAll('a'): - if alink.string is not None: - tstr = alink.string - alink.replaceWith(tstr) - return soup - diff --git a/recipes/samanyolu_teknoloji.recipe b/recipes/samanyolu_teknoloji.recipe deleted file mode 100644 index d05dfaf9a6..0000000000 --- a/recipes/samanyolu_teknoloji.recipe +++ /dev/null @@ -1,49 +0,0 @@ -# -*- coding: utf-8 -*- - -from calibre.web.feeds.news import BasicNewsRecipe - - -class SHaberTekno (BasicNewsRecipe): - - title = u'Samanyolu Teknoloji' - __author__ = u'thomass' - description = 'Samanyolu Teknoloji Haber Sitesinden haberler ' - oldest_article = 8 - max_articles_per_feed = 100 - no_stylesheets = True - encoding = 'utf-8' - publisher = 'thomass' - category = 'bilim, teknoloji, haber, türkçe' - language = 'tr' - publication_type = 'magazine' - - conversion_options = { - 'tags': category, 'language': language, 'publisher': publisher, 'linearize_tables': True - } - extra_css = ' .IcerikMetin{ font-family:sans-serif;font-weight: normal;font-size: 10px } .h1IcerikBaslik {font-weight: bold; font-size: 18px}' - - keep_only_tags = [ - dict(name='div', attrs={'class': ['IcerikBaslik', 'IcerikMetinDiv']})] - - cover_img_url = 'http://teknoloji.samanyoluhaber.com/resources/images/logo_s_digi.jpg' - masthead_url = 'http://teknoloji.samanyoluhaber.com/resources/images/logo_s_digi.jpg' - remove_empty_feeds = True - - feeds = [ - (u'GENEL', u'http://podcast.samanyoluhaber.com/Teknoloji.rss'), - (u'İNTERNET', u'http://open.dapper.net/services/shaberteknolojiinternet'), - (u'CEP TELEFONU', - u'http://open.dapper.net/services/shaberteknolojicep'), - (u'OYUN', u'http://open.dapper.net/services/shaberteknolojioyun'), - (u'DONANIM', u'http://open.dapper.net/services/httpopendappernetservicesshaberteknolojidonanim'), - (u'ÜRÜN İNCELEME', - u'http://open.dapper.net/services/shaberteknolojiurun'), - (u'ALIŞVERİŞ', u'http://open.dapper.net/services/shaberteknolojialisveris'), - (u'BİLİM & TEKNOLOJİ', - u'http://open.dapper.net/services/shaberteknolojibilim'), - (u'HABERLER', u'http://open.dapper.net/services/shaberteknolojihaber'), - - - - ] - diff --git a/recipes/sarajevo_x.recipe b/recipes/sarajevo_x.recipe deleted file mode 100644 index c5a97b369c..0000000000 --- a/recipes/sarajevo_x.recipe +++ /dev/null @@ -1,73 +0,0 @@ -__license__ = 'GPL v3' -__copyright__ = '2010, Darko Miletic ' - -''' -sarajevo-x.com -''' - -import re - -from calibre.ebooks.BeautifulSoup import NavigableString, Tag -from calibre.web.feeds.recipes import BasicNewsRecipe - - -def new_tag(soup, name, attrs=()): - impl = getattr(soup, 'new_tag', None) - if impl is not None: - return impl(name, attrs=dict(attrs)) - return Tag(soup, name, attrs=attrs or None) - - -class SarajevoX(BasicNewsRecipe): - title = 'Sarajevo-x.com' - __author__ = 'Darko Miletic' - description = 'Sarajevo-x.com - najposjeceniji bosanskohercegovacki internet portal' - publisher = 'InterSoft d.o.o.' - category = 'news, politics, Bosnia and Herzegovina,Sarajevo-x.com, internet, portal, vijesti, bosna i hercegovina, sarajevo' - oldest_article = 2 - delay = 1 - max_articles_per_feed = 100 - no_stylesheets = True - encoding = 'cp1250' - use_embedded_content = False - language = 'bs' - extra_css = ' @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: Arial,Verdana,Helvetica,sans1,sans-serif} .article_description{font-family: sans1, sans-serif} div#fotka{display: block} img{margin-bottom: 0.5em} ' # noqa - - conversion_options = { - 'comment': description, 'tags': category, 'publisher': publisher, 'language': language - } - - preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')] - - keep_only_tags = [dict(name='div', attrs={'class': 'content-bg'})] - remove_tags_after = dict(name='div', attrs={'class': 'izvor'}) - remove_tags = [dict(name=['object', 'link', 'base', 'table'])] - remove_attributes = ['height', 'width', 'alt', 'border'] - - feeds = [ - - (u'BIH', u'http://www.sarajevo-x.com/rss/bih'), - (u'Svijet', u'http://www.sarajevo-x.com/rss/svijet'), - (u'Biznis', u'http://www.sarajevo-x.com/rss/biznis'), - (u'Sport', u'http://www.sarajevo-x.com/rss/sport'), - (u'Showtime', u'http://www.sarajevo-x.com/rss/showtime'), - (u'Scitech', u'http://www.sarajevo-x.com/rss/scitech'), - (u'Lifestyle', u'http://www.sarajevo-x.com/rss/lifestyle'), - (u'Kultura', u'http://www.sarajevo-x.com/rss/kultura'), - (u'Zanimljivosti', u'http://www.sarajevo-x.com/rss/zanimljivosti') - ] - - def preprocess_html(self, soup): - dtag = soup.find('div', attrs={'id': 'fotka'}) - if dtag: - sp = soup.find('div', attrs={'id': 'opisslike'}) - img = soup.find('img') - if sp: - sp - else: - mtag = new_tag(soup, 'div', [ - ("id", "opisslike"), ("class", "opscitech")]) - mopis = NavigableString("Opis") - mtag.insert(0, mopis) - img.append(mtag) - return soup diff --git a/recipes/sardinia_post.recipe b/recipes/sardinia_post.recipe deleted file mode 100644 index 3a4b902f6c..0000000000 --- a/recipes/sardinia_post.recipe +++ /dev/null @@ -1,19 +0,0 @@ -#!/usr/bin/env python -# vim:fileencoding=utf-8 -from __future__ import absolute_import, division, print_function, unicode_literals - -from calibre.web.feeds.news import AutomaticNewsRecipe - - -class BasicUserRecipe1501589429(AutomaticNewsRecipe): - title = 'Sardinia Post (Lapis)' - oldest_article = 30 - max_articles_per_feed = 100 - auto_cleanup = True - language = 'sc' - __author__ = 'tzium' - - feeds = [ - ('Sardinia Post (Lapis)', - 'https://www.sardiniapost.it/category/lapis/feed'), - ] diff --git a/recipes/satira.recipe b/recipes/satira.recipe deleted file mode 100644 index 6d130853e7..0000000000 --- a/recipes/satira.recipe +++ /dev/null @@ -1,18 +0,0 @@ -__license__ = 'GPL v3' -from calibre.web.feeds.news import BasicNewsRecipe - - -class AdvancedUserRecipe1327351409(BasicNewsRecipe): - title = u'Satira' - oldest_article = 7 - max_articles_per_feed = 100 - auto_cleanup = True - feeds = [ - (u'spinoza', u'http://feeds.feedburner.com/Spinoza'), - (u'umore maligno', u'http://www.umoremaligno.it/feed/rss/'), - (u'fed-ex', u'http://exfed.tumblr.com/rss'), - (u'metilparaben', u'http://feeds.feedburner.com/metil'), - (u'freddy nietzsche', u'http://feeds.feedburner.com/FreddyNietzsche')] - __author__ = 'faber1971' - description = 'Collection of Italian satiric blogs - v1.00 (28, January 2012)' - language = 'it' diff --git a/recipes/sb_nation.recipe b/recipes/sb_nation.recipe deleted file mode 100644 index 00b33ecefa..0000000000 --- a/recipes/sb_nation.recipe +++ /dev/null @@ -1,57 +0,0 @@ -#!/usr/bin/env python - -__license__ = 'GPL v3' -__copyright__ = 'Zotzo' -''' -http://www.stumptownfooty.com/ -http://www.eightysixforever.com -http://www.sounderatheart.com -http://www.dailysoccerfix.com/ - -''' -from calibre.web.feeds.news import BasicNewsRecipe - - -class SBNation(BasicNewsRecipe): - title = u'SBNation' - __author__ = 'rylsfan' - description = u"More than 290 individual communities, each offering high quality year-round coverage and conversation led by fans who are passionate." - oldest_article = 3 - language = 'en' - max_articles_per_feed = 100 - no_stylesheets = True - use_embedded_content = False - - keep_only_tags = [ - dict(name='h2', attrs={'class': 'title'}), dict( - name='div', attrs={'class': 'entry-body'}) - ] - - remove_tags_after = dict( - name='div', attrs={'class': 'footline entry-actions'}) - remove_tags = [ - dict(name='div', attrs={'class': 'footline entry-actions'}), - {'class': 'extend-divide'} - ] - # SBNation has 300 special blogs to choose from. These are just a couple! - feeds = [ - (u'Daily Fix', u'http://www.dailysoccerfix.com/rss/'), - (u"Stumptown Footy", u'http://www.stumptownfooty.com/rss/'), - (u'Sounders', u'http://www.sounderatheart.com/rss/'), - (u'Whitecaps', u'http://www.eightysixforever.com/rss/'), - ] - - extra_css = """ - h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;} - h2{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;} - p{font-family:Helvetica,sans-serif; display: block; text-align: left; text-decoration: none; text-indent: 0%;} - body{font-family:Helvetica,Arial,sans-serif;font-size:small;} - """ - - def preprocess_html(self, soup): - return self.adeify_images(soup) - - def populate_article_metadata(self, article, soup, first): - h2 = soup.find('h2') - h2.replaceWith(h2.prettify() + '

By ' + - article.author + '

') diff --git a/recipes/schattenblick.recipe b/recipes/schattenblick.recipe deleted file mode 100644 index dd0b5dedb1..0000000000 --- a/recipes/schattenblick.recipe +++ /dev/null @@ -1,14 +0,0 @@ -from calibre.web.feeds.news import BasicNewsRecipe - - -class AdvancedUserRecipe1345802300(BasicNewsRecipe): - title = u'Online-Zeitung Schattenblick' - language = 'de' - __author__ = 'ThB' - publisher = u'MA-Verlag' - category = u'Nachrichten' - oldest_article = 7 - max_articles_per_feed = 100 - cover_url = 'http://www.schattenblick.de/mobi/rss/cover.jpg' - feeds = [(u'Schattenblick Tagesausgabe', - u'http://www.schattenblick.de/mobi/rss/rss.xml')] diff --git a/recipes/schwarzerpfeil.recipe b/recipes/schwarzerpfeil.recipe deleted file mode 100644 index aec7c85bd2..0000000000 --- a/recipes/schwarzerpfeil.recipe +++ /dev/null @@ -1,30 +0,0 @@ -# -*- mode: python; -*- -__license__ = 'GPL v3' - -''' -SchwarzerPfeil Calibre recipe. -''' - -from calibre.web.feeds.recipes import BasicNewsRecipe - - -class Schwarzerpfeil(BasicNewsRecipe): - title = 'SchwarzerPfeil' - __author__ = 'tastytea' - description = 'Das partizipative Mag von und für die antiautoritäre Bewegung' - publication_type = 'magazine' - language = 'de' - timefmt = ' [%d. %B %Y]' - max_articles_per_feed = 100 - oldest_article = 30 - use_embedded_content = True - no_stylesheets = True - auto_cleanup = False - - feeds = [ - ('Artikel', 'https://schwarzerpfeil.de/feed/'), - ('Kommentare', 'https://schwarzerpfeil.de/comments/feed/') - ] - - def get_cover_url(self): - return "https://schwarzerpfeil.de/wp-content/uploads/2020/09/12-1.png" diff --git a/recipes/sciencedaily.recipe b/recipes/sciencedaily.recipe deleted file mode 100644 index ee2dae291e..0000000000 --- a/recipes/sciencedaily.recipe +++ /dev/null @@ -1,40 +0,0 @@ -#!/usr/bin/env python -__license__ = 'GPL v3' -__copyright__ = '2008-2017, Darko Miletic ' -''' -sciencedaily.com -''' - -from calibre.web.feeds.news import BasicNewsRecipe - - -class ScienceDaily(BasicNewsRecipe): - title = u'ScienceDaily' - __author__ = u'Darko Miletic' - title = 'ScienceDaily' - description = ('ScienceDaily is one of the Internet\'s most popular ' - 'science news web sites. Since starting in 1995, the ' - 'award-winning site has earned the loyalty of students, ' - 'researchers, healthcare professionals, government ' - 'agencies, educators and the general public around the ' - 'world. Now with more than 6 million monthly visitors ' - 'worldwide, ScienceDaily generates nearly 20 million ' - 'page views a month and is steadily growing in its ' - 'global audience.') - category = 'medicin, healthcare' - oldest_article = 7 - max_articles_per_feed = 50 - auto_cleanup = True - language = 'en' - encoding = 'utf-8' - - # Feed are found here: https://www.sciencedaily.com/newsfeeds.htm - feeds = [ - ('Latest Science News', 'https://www.sciencedaily.com/rss/top.xml'), - ('All Top News', 'https://www.sciencedaily.com/rss/top/science.xml'), - ('Health News', 'https://www.sciencedaily.com/rss/top/health.xml'), - ('Technology News', 'https://www.sciencedaily.com/rss/top/technology.xml'), - ('Environment News', 'https://www.sciencedaily.com/rss/top/environment.xml'), - ('Society News', 'https://www.sciencedaily.com/rss/top/society.xml'), - ('Strange & Offbeat News', 'https://www.sciencedaily.com/rss/strange_offbeat.xml'), - ] diff --git a/recipes/seanhannity.recipe b/recipes/seanhannity.recipe deleted file mode 100644 index 01a5f45dcb..0000000000 --- a/recipes/seanhannity.recipe +++ /dev/null @@ -1,31 +0,0 @@ -#!/usr/bin/env python - -from calibre.web.feeds.recipes import BasicNewsRecipe - - -class SeanHannity(BasicNewsRecipe): - cover_url = 'http://www.hannity.com/images/misc_logo.gif' - title = u"Sean Hannity Show" - __author__ = 'Rob Lammert - rob.lammert[at]gmail.com' - description = u"Articles from Sean Hannity's website, www.hannity.com" - oldest_article = 7.0 - language = 'en' - max_articles_per_feed = 100 - recursions = 0 - encoding = 'utf8' - no_stylesheets = True - remove_javascript = True - - remove_tags = [ - dict(name='div', attrs={'id': [ - 'header', 'navsprite', 'topminibarad', 'headline_bar', 'shadow', 'footer']}), - dict(name='div', attrs={'class': 'rightcolumn'}), - dict(name='table', attrs={'id': 'audiobox'}), - dict(name='a', attrs={'title': ['Home', 'Shows', 'Guests', 'Photos']}), - dict(name='iframe') - ] - - feeds = [ - ('Content Feed', u'http://feeds.feedburner.com/TheSeanHannityShow-AllContent?format=xml') - ] - diff --git a/recipes/security_watch.recipe b/recipes/security_watch.recipe deleted file mode 100644 index 13c17f10b1..0000000000 --- a/recipes/security_watch.recipe +++ /dev/null @@ -1,19 +0,0 @@ -from calibre.web.feeds.news import BasicNewsRecipe - - -class SecurityWatch(BasicNewsRecipe): - title = u'securitywatch' - description = 'security news' - timefmt = ' [%d %b %Y]' - __author__ = 'Oliver Niesner' - no_stylesheets = True - oldest_article = 14 - max_articles_per_feed = 100 - use_embedded_content = False - language = 'en' - auto_cleanup = True - - feeds = [ - (u'securitywatch', - u'http://feeds.pcmag.com/Rss.aspx/SectionArticles?sectionId=28026') - ] diff --git a/recipes/serverside.recipe b/recipes/serverside.recipe deleted file mode 100644 index 1caa05e33d..0000000000 --- a/recipes/serverside.recipe +++ /dev/null @@ -1,33 +0,0 @@ -#!/usr/bin/env python - -__license__ = 'GPL v3' -__copyright__ = '2009, Rick Kellogg' -''' -TheServerSide.com -''' - -from calibre.web.feeds.news import BasicNewsRecipe - - -class Engadget(BasicNewsRecipe): - title = u'TheServerSide.com' - __author__ = 'Rick Kellogg' - description = 'news' - language = 'en' - oldest_article = 7 - max_articles_per_feed = 100 - no_stylesheets = True - use_embedded_content = False - - remove_tags = [dict(name='table', attrs={'class': ["head"]})] - - feeds = [(u'News', u'http://feeds.feedburner.com/techtarget/tsscom/home')] - - def get_article_url(self, article): - - url = article.get('guid', None) - - return url - - def print_version(self, url): - return url.replace('http://www.theserverside.com/news/thread.tss?thread_id=', 'http://www.theserverside.com/common/printthread.tss?thread_id=') diff --git a/recipes/sg_hu.recipe b/recipes/sg_hu.recipe deleted file mode 100644 index 68b027abb5..0000000000 --- a/recipes/sg_hu.recipe +++ /dev/null @@ -1,17 +0,0 @@ -from calibre.web.feeds.news import BasicNewsRecipe - - -class SGhu(BasicNewsRecipe): - title = u'SG.hu' - __author__ = 'davotibarna' - description = u'Informatika \xe9s Tudom\xe1ny' - language = 'hu' - oldest_article = 5 - max_articles_per_feed = 100 - no_stylesheets = True - encoding = 'utf-8' - - feeds = [(u'SG.hu', u'http://www.sg.hu/plain/rss.xml')] - - def print_version(self, url): - return url.replace('cikkek/', 'printer.php?cid=') diff --git a/recipes/shacknews.recipe b/recipes/shacknews.recipe deleted file mode 100644 index acf6043496..0000000000 --- a/recipes/shacknews.recipe +++ /dev/null @@ -1,17 +0,0 @@ - -from calibre.web.feeds.news import BasicNewsRecipe - - -class Shacknews(BasicNewsRecipe): - __author__ = 'Docbrown00' - __license__ = 'GPL v3' - title = u'Shacknews' - oldest_article = 7 - max_articles_per_feed = 100 - language = 'en' - no_stylesheets = True - auto_cleanup = True - - feeds = [ - (u'Latest News', u'http://www.shacknews.com/shackfeed.xml'), - ] diff --git a/recipes/shortlist.recipe b/recipes/shortlist.recipe deleted file mode 100644 index b8f7b9f772..0000000000 --- a/recipes/shortlist.recipe +++ /dev/null @@ -1,66 +0,0 @@ -import re - -from calibre.web.feeds.news import BasicNewsRecipe - - -class AdvancedUserRecipe1324663493(BasicNewsRecipe): - title = u'Shortlist' - description = 'Articles From Shortlist.com' - # I've set oldest article to 7 days as the website updates weekly - oldest_article = 8 - max_articles_per_feed = 20 - remove_empty_feeds = True - remove_javascript = True - no_stylesheets = True - ignore_duplicate_articles = {'title'} - - __author__ = 'Dave Asbury' - # last updated 7/10/12 - language = 'en_GB' - - def get_cover_url(self): - soup = self.index_to_soup('http://www.shortlist.com') - cov = soup.find(attrs={'width': '121'}) - # print '******** ',cov,' ***' - # cover_url = 'http://www.shortlist.com'+cov['src'] - cover_url = cov['src'] - return cover_url - - masthead_url = 'http://www.mediauk.com/logos/100/344096.png' - - preprocess_regexps = [ - (re.compile(r'…or.*?email to your friends.', re.IGNORECASE | re.DOTALL), lambda match: '')] - - keep_only_tags = [ - # dict(name='h1'), - dict(name='h2', attrs={'class': 'title'}), - dict(name='h3', atts={'class': 'subheading'}), - dict(attrs={'class': ['hero-static', 'stand-first']}), - dict(attrs={'class': 'hero-image'}), - dict(name='div', attrs={ - 'id': ['list', 'article', 'article alternate']}), - dict(name='div', attrs={'class': 'stand-first'}), - ] - remove_tags = [dict(name='h2', attrs={'class': 'graphic-header'}), - dict(attrs={ - 'id': ['share', 'twitter', 'facebook', 'digg', 'delicious', 'facebook-like']}), - dict(atts={'class': [ - 'related-content', 'related-content-item', 'related-content horizontal', 'more']}), - - ] - - remove_tags_after = [dict(name='p', attrs={'id': 'tags'}) - ] - - feeds = [ - # edit http://feed43.com/feed.html?name=3156308700147005 - # repeatable pattern =

{_}{%}{*}

- - (u'This Weeks Issue', u'http://feed43.com/5205766657404804.xml'), - (u'Home Page', u'http://feed43.com/3156308700147005.xml'), - (u'Cool Stuff', u'http://feed43.com/1557051772026706.xml'), - (u'Style', u'http://feed43.com/4168836374571502.xml'), - (u'Entertainment', u'http://feed43.com/4578504030588024.xml'), - - - ] diff --git a/recipes/sigma_live.recipe b/recipes/sigma_live.recipe deleted file mode 100644 index d34c43c551..0000000000 --- a/recipes/sigma_live.recipe +++ /dev/null @@ -1,14 +0,0 @@ -from calibre.web.feeds.news import BasicNewsRecipe - - -class sigmalive(BasicNewsRecipe): - title = u'SigmaLive' - __author__ = 'Stelios' - oldest_article = 7 - max_articles_per_feed = 100 - auto_cleanup = True - category = 'news, CY' - description = 'Cypriot News' - language = 'el' - encoding = 'utf8' - feeds = [(u'sigmalive', u'http://sigmalive.com/rss/latest')] diff --git a/recipes/sign_on_sd.recipe b/recipes/sign_on_sd.recipe deleted file mode 100644 index aefe92b71b..0000000000 --- a/recipes/sign_on_sd.recipe +++ /dev/null @@ -1,54 +0,0 @@ -from calibre.web.feeds.news import BasicNewsRecipe - - -class AdvancedUserRecipe1315899507(BasicNewsRecipe): - title = u'Sign On San Diego' - __author__ = 'Jay Kindle' - description = 'Local news stories from The San Diego Union-Tribune; breaking news, business and technology, local and national sports coverage, entertainment news and reviews.' # noqa - publisher = 'Tribune Company' - category = 'news, politics, USA, San Diego, California, world' - oldest_article = 2 - max_articles_per_feed = 200 - timefmt = ' [%b %d, %Y]' - no_stylesheets = True - encoding = 'utf8' - use_embedded_content = False - language = 'en' - auto_cleanup = True - remove_empty_feeds = True - publication_type = 'newspaper' - - feeds = [ - (u'Latest News', - u'http://www.sandiegouniontribune.com/latest/rss2.0.xml'), - (u'Business', - u'http://www.sandiegouniontribune.com/business/rss2.0.xml'), - (u'Politics', - u'http://www.sandiegouniontribune.com/news/politics/rss2.0.xml'), - (u'Immigration', - u'http://www.sandiegouniontribune.com/news/immigration/rss2.0.xml'), - (u'Courts', - u'http://www.sandiegouniontribune.com/news/public-safety/rss2.0.xml'), - (u'Education', - u'http://www.sandiegouniontribune.com/news/education/rss2.0.xml'), - (u'Sports', - u'http://www.sandiegouniontribune.com/sports/rss2.0.xml'), - (u'Chargers', - u'http://www.sandiegouniontribune.com/sports/chargers/rss2.0.xml'), - (u'Padres', - u'http://www.sandiegouniontribune.com/sports/padres/rss2.0.xml'), - (u'NFL', - u'http://www.sandiegouniontribune.com/sports/nfl/rss2.0.xml'), - (u'NBA', - u'http://www.sandiegouniontribune.com/sports/nba/rss2.0.xml'), - (u'Photos', - u'http://www.sandiegouniontribune.com/visuals/rss2.0.xml'), - (u'Entertainment', - u'http://www.sandiegouniontribune.com/entertainment/rss2.0.xml'), - (u'Books', - u'http://www.sandiegouniontribune.com/entertainment/books/rss2.0.xml'), - (u'Opinion', - u'http://www.sandiegouniontribune.com/opinion/rss2.0.xml'), - (u'Travel', - u'http://www.sandiegouniontribune.com/lifestyle/travel/rss2.0.xml'), - ] diff --git a/recipes/silicon_republic.recipe b/recipes/silicon_republic.recipe deleted file mode 100644 index 8623a18c35..0000000000 --- a/recipes/silicon_republic.recipe +++ /dev/null @@ -1,19 +0,0 @@ -__license__ = 'GPL v3' -__copyright__ = '2011 Neil Grogan' -# -# Silicon Republic Recipe -# - -from calibre.web.feeds.news import BasicNewsRecipe - - -class SiliconRepublic(BasicNewsRecipe): - title = u'Silicon Republic' - oldest_article = 7 - max_articles_per_feed = 100 - __author__ = u'Neil Grogan' - language = 'en_IE' - - remove_tags = [dict(attrs={'class': ['thumb', 'txt', 'compactbox', 'icons', 'catlist', 'catlistinner', 'taglist', 'taglistinner', 'social', 'also-in', 'also-in-inner', 'also-in-footer', 'zonek-dfp', 'paneladvert', 'rcadvert', 'panel', 'h2b']}), dict(id=['header', 'logo', 'header-right', 'sitesearch', 'rsslinks', 'topnav', 'topvideos', 'topvideos-list', 'topnews', 'topnews-list', 'slideshow', 'slides', 'compactheader', 'compactnews', 'compactfeatures', 'article-type', 'contactlinks-header', 'banner-zone-k-dfp', 'footer-related', 'directory-services', 'also-in-section', 'featuredrelated1', 'featuredrelated2', 'featuredrelated3', 'featuredrelated4', 'advert2-dfp']), dict(name=['script', 'style'])] # noqa - - feeds = [(u'News', u'http://www.siliconrepublic.com/feeds/')] diff --git a/recipes/singtao_daily.recipe b/recipes/singtao_daily.recipe deleted file mode 100644 index 3551a2799e..0000000000 --- a/recipes/singtao_daily.recipe +++ /dev/null @@ -1,79 +0,0 @@ -from calibre.web.feeds.recipes import BasicNewsRecipe - - -class AdvancedUserRecipe1278063072(BasicNewsRecipe): - title = u'Singtao Daily - Canada' - oldest_article = 7 - max_articles_per_feed = 100 - __author__ = 'rty' - description = 'Toronto Canada Chinese Newspaper' - publisher = 'news.singtao.ca' - category = 'Chinese, News, Canada' - remove_javascript = True - use_embedded_content = False - no_stylesheets = True - language = 'zh' - conversion_options = {'linearize_tables': True} - masthead_url = 'http://news.singtao.ca/i/site_2009/logo.jpg' - extra_css = ''' - @font-face {font-family: "DroidFont", serif, sans-serif; src: url(res:///system/fonts/DroidSansFallback.ttf); }\ - - body {text-align: justify; margin-right: 8pt; font-family: 'DroidFont', serif;}\ - - h1 {font-family: 'DroidFont', serif;}\ - - .articledescription {font-family: 'DroidFont', serif;} - ''' - keep_only_tags = [ - dict(name='div', attrs={'id': ['title', 'storybody']}), - dict(name='div', attrs={'class': 'content'}) - ] - - def parse_index(self): - feeds = [] - for title, url in [ - ('Editorial', - 'http://news.singtao.ca/toronto/editorial.html'), - ('Toronto \xe5\x9f\x8e\xe5\xb8\x82/\xe7\xa4\xbe\xe5\x8d\x80'.decode('utf-8'), - 'http://news.singtao.ca/toronto/city.html'), - ('Canada \xe5\x8a\xa0\xe5\x9c\x8b'.decode('utf-8'), - 'http://news.singtao.ca/toronto/canada.html'), - ('Entertainment', - 'http://news.singtao.ca/toronto/entertainment.html'), - ('World', - 'http://news.singtao.ca/toronto/world.html'), - ('Finance \xe5\x9c\x8b\xe9\x9a\x9b\xe8\xb2\xa1\xe7\xb6\x93'.decode('utf-8'), - 'http://news.singtao.ca/toronto/finance.html'), - ('Sports', 'http://news.singtao.ca/toronto/sports.html'), - ]: - articles = self.parse_section(url) - if articles: - feeds.append((title, articles)) - return feeds - - def parse_section(self, url): - soup = self.index_to_soup(url) - div = soup.find( - attrs={'class': ['newslist paddingL10T10', 'newslist3 paddingL10T10']}) - current_articles = [] - for li in div.findAll('li'): - a = li.find('a', href=True) - if a is None: - continue - title = self.tag_to_string(a) - url = a.get('href', False) - if not url or not title: - continue - if url.startswith('/'): - url = 'http://news.singtao.ca' + url - current_articles.append( - {'title': title, 'url': url, 'description': ''}) - - return current_articles - - def preprocess_html(self, soup): - for item in soup.findAll(style=True): - del item['style'] - for item in soup.findAll(width=True): - del item['width'] - return soup diff --git a/recipes/siol.recipe b/recipes/siol.recipe deleted file mode 100644 index c7d964cd9a..0000000000 --- a/recipes/siol.recipe +++ /dev/null @@ -1,58 +0,0 @@ -# coding: utf-8 -__license__ = 'GPL v3' -__copyright__ = '2010, BlonG' -''' -www.siol.si -''' -from calibre.web.feeds.news import BasicNewsRecipe - - -class Siol(BasicNewsRecipe): - title = u'Siol.net' - __author__ = u'BlonG' - description = "Multimedijski portal z aktualnimi vsebinami, intervjuji, komentarji iz Slovenije in sveta, sportal, trendi, avtomoto, blogos" - oldest_article = 3 - language = 'sl' - max_articles_per_feed = 20 - no_stylesheets = True - use_embedded_content = False - - cover_url = 'https://sites.google.com/site/javno2010/home/siol_cover.jpg' - - extra_css = ''' - h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;} - h2{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;} - p{font-family:Arial,Helvetica,sans-serif;font-size:small;} - body{font-family:Helvetica,Arial,sans-serif;font-size:small;} - ''' - - html2lrf_options = ['--base-font-size', '10'] - - keep_only_tags = [ - dict(name='div', attrs={'id': 'idContent'}), - ] - - remove_tags = [ - dict(name='span', attrs={'class': 'com1'}), - dict(name='div', attrs={'class': 'relation'}), - dict(name='p', attrs={'class': 'path'}), - dict(name='div', attrs={'class': 'clear_r'}), - dict(name='div', attrs={'id': 'appendix'}), - dict(name='div', attrs={'id': 'rail'}), - dict(name='div', attrs={'id': 'div_comments'}), - dict(name='div', attrs={'class': 'thumbs'}), - ] - - feeds = [ - - (u'Slovenija', u'http://www.siol.net/rss.aspx?path=Slovenija'), - (u'Lokalne novice', u'http://www.siol.net/rss.aspx?path=Slovenija/Lokalne_novice'), - (u'EU', u'http://www.siol.net/rss.aspx?path=EU'), - (u'Svet', u'http://www.siol.net/rss.aspx?path=Svet'), - (u'Gospodarstvo', u'http://www.siol.net/rss.aspx?path=Gospodarstvo'), - (u'Sportal', u'http://www.siol.net/rss.aspx?path=Sportal'), - (u'Trendi', u'http://www.siol.net/rss.aspx?path=Trendi'), - (u'Avtomoto', u'http://www.siol.net/rss.aspx?path=Avtomoto'), - (u'Tehnologija', u'http://www.siol.net/rss.aspx?path=Tehnologija'), - (u'TV / Film', u'http://www.siol.net/rss.aspx?path=TV') - ] diff --git a/recipes/sisainlive.recipe b/recipes/sisainlive.recipe deleted file mode 100644 index 63546cde63..0000000000 --- a/recipes/sisainlive.recipe +++ /dev/null @@ -1,49 +0,0 @@ -# -*- coding: utf-8 -*- -__license__ = 'GPL v3' -__copyright__ = '2015, Hoje Lee ' -''' -Profile to download SisaIN Live -''' -from calibre.web.feeds.news import BasicNewsRecipe - - -class SisaINLive(BasicNewsRecipe): - language = 'ko' - title = u'시사인 라이브' - description = u'시사인 라이브 기사' - __author__ = 'Hoje Lee' - oldest_article = 30 - max_articles_per_feed = 10 - auto_cleanup = True - """ - # manual cleanup - no_stylesheets = True - remove_javascript = True - - keep_only_tags = [ - dict(name='div', attrs ={'class':['View_Title']}), - dict(name='div', attrs ={'class':['View_Info']}), - dict(name='div', attrs ={'class':['View_Time']}), - dict(id='articleBody'), - ] - remove_tags = [ - dict(name='table', attrs ={'width':['320'], 'height':['265']}), - ] - """ - - feeds = [ - # (u'전체기사', 'http://www.sisainlive.com/rss.xml'), - (u'인기기사', 'http://www.sisainlive.com/rss/clickTop.xml'), - (u'커버스토리', 'http://www.sisainlive.com/rss/SRN121.xml'), - (u'특집', 'http://www.sisainlive.com/rss/SRN122.xml'), - (u'정치', 'http://www.sisainlive.com/rss/S1N15.xml'), - (u'경제', 'http://www.sisainlive.com/rss/S1N16.xml'), - (u'사회', 'http://www.sisainlive.com/rss/S1N17.xml'), - (u'문화', 'http://www.sisainlive.com/rss/S1N18.xml'), - (u'국제.한반도', 'http://www.sisainlive.com/rss/S1N4.xml'), - (u'실용.과학', 'http://www.sisainlive.com/rss/S1N6.xml'), - (u'휴먼&휴', 'http://www.sisainlive.com/rss/S1N19.xml'), - (u'인터뷰.오피니언', 'http://www.sisainlive.com/rss/S1N5.xml'), - (u'사진.만화', 'http://www.sisainlive.com/rss/S1N7.xml'), - (u'별책부록', 'http://www.sisainlive.com/rss/S1N14.xml'), - ] diff --git a/recipes/sizinti_derigisi.recipe b/recipes/sizinti_derigisi.recipe deleted file mode 100644 index ebf8e041dd..0000000000 --- a/recipes/sizinti_derigisi.recipe +++ /dev/null @@ -1,31 +0,0 @@ -# -*- coding: utf-8 -*- - -from calibre.web.feeds.news import BasicNewsRecipe - - -class TodaysZaman_en(BasicNewsRecipe): - title = u'Sızıntı Dergisi' - __author__ = u'thomass' - description = 'a Turkey based daily for national and international news in the fields of business, diplomacy, politics, culture, arts, sports and economics, in addition to commentaries, specials and features' # noqa - oldest_article = 30 - max_articles_per_feed = 80 - no_stylesheets = True - encoding = 'utf-8' - category = 'dergi, ilim, kültür, bilim,Türkçe' - language = 'tr' - publication_type = 'magazine' - - cover_img_url = 'http://www.sizinti.com.tr/images/sizintiprint.jpg' - masthead_url = 'http://www.sizinti.com.tr/images/sizintiprint.jpg' - remove_tags_before = dict(id='content-right') - - feeds = [ - (u'Sızıntı', u'http://www.sizinti.com.tr/rss'), - ] - - # def preprocess_html(self, soup): - # return self.adeify_images(soup) - # def print_version(self, url): #there is a problem caused by table format - # return - # url.replace('http://www.todayszaman.com/newsDetail_getNewsById.action?load=detay&', - # 'http://www.todayszaman.com/newsDetail_openPrintPage.action?') diff --git a/recipes/skanderborglokalavisen_dk.recipe b/recipes/skanderborglokalavisen_dk.recipe deleted file mode 100644 index 832edb1cd7..0000000000 --- a/recipes/skanderborglokalavisen_dk.recipe +++ /dev/null @@ -1,32 +0,0 @@ -#!/usr/bin/env python -# vim:fileencoding=utf-8 -# https://manual.calibre-ebook.com/news_recipe.html -from __future__ import absolute_import, division, print_function, unicode_literals - -from calibre.web.feeds.news import BasicNewsRecipe - -''' -Lokalavisen Skanderborg -''' - - -class SkanderborgLokalavisen_dk(BasicNewsRecipe): - __author__ = 'CoderAllan.github.com' - title = 'Lokalavisen Skanderborg' - description = 'Lokale og regionale nyheder, sport, kultur fra Skanderborg og omegn på skanderborg.lokalavisen.dk' - category = 'newspaper, news, localnews, sport, culture, Denmark' - oldest_article = 7 - max_articles_per_feed = 50 - auto_cleanup = True - language = 'da' - - feeds = [ - ('Seneste nyt fra Lokalavisen Skanderborg', 'http://skanderborg.lokalavisen.dk/section/senestenytrss'), - ('Seneste lokale nyheder fra Lokalavisen Skanderborg', 'http://skanderborg.lokalavisen.dk/section/senestelokalenyhederrss'), - ('Seneste sport fra Lokalavisen Skanderborg', 'http://skanderborg.lokalavisen.dk/section/senestesportrss'), - ('Seneste 112 nyheder fra Lokalavisen Skanderborg', 'http://skanderborg.lokalavisen.dk/section/seneste112rss'), - ('Seneste kultur nyheder fra Lokalavisen Skanderborg', 'http://skanderborg.lokalavisen.dk/section/senestekulturrss'), - ('Seneste læserbreve fra Lokalavisen Skanderborg', 'http://skanderborg.lokalavisen.dk/section/senestelaeserbreverss'), - - ] - diff --git a/recipes/skylife.recipe b/recipes/skylife.recipe deleted file mode 100644 index 344363cea4..0000000000 --- a/recipes/skylife.recipe +++ /dev/null @@ -1,31 +0,0 @@ -# -*- coding: utf-8 -*- - -from calibre.web.feeds.news import BasicNewsRecipe - - -class THY (BasicNewsRecipe): - - title = u'Skylife' - __author__ = u'thomass' - description = ' Türk Hava Yollarının yayınladığı aylık kültür dergisi (Fotoğrafları da içermesini isterseniz keep_only_tag''da belirttiğim kodu da ekleyin) ' # noqa - oldest_article = 32 - max_articles_per_feed = 100 - no_stylesheets = True - encoding = 'utf-8' - publisher = 'thomass' - category = 'genel kültür, gezi,Türkçe' - language = 'tr' - publication_type = 'magazine' - - conversion_options = { - 'comment': description, 'tags': category, 'publisher': publisher, 'language': language - } - # Fotoğrafları da eklemek için: dict(name='div', - # attrs={'id':['divResimler']}) - keep_only_tags = [dict(name='h3', attrs={'id': ['hpbaslik']}), dict( - name='p', attrs={'id': ['pyayin', 'hspot', 'picerik']})] - masthead_url = 'http://www.turkishairlines.com/static/img/skylife/logo.png' - remove_empty_feeds = True - remove_attributes = ['width', 'height'] - - feeds = [(u'SKYLIFE', u'http://feed43.com/7783278414103376.xml')] diff --git a/recipes/slate_star_codex.recipe b/recipes/slate_star_codex.recipe deleted file mode 100644 index 6f0bde61da..0000000000 --- a/recipes/slate_star_codex.recipe +++ /dev/null @@ -1,49 +0,0 @@ -#!/usr/bin/env python - -import re - -from calibre.web.feeds.news import BasicNewsRecipe - -__license__ = 'GPL v3' -__copyright__ = '2015, Ned Letcher ' - - -""" -calibre recipe for Slate Star Codex. -""" - - -class SlateStarCodex(BasicNewsRecipe): - title = u'Slate Star Codex' - description = 'IN A MAD WORLD, ALL BLOGGING IS PSYCHIATRY BLOGGING' - __author__ = 'Ned Letcher' - max_articles_per_feed = 20 - language = 'en' - encoding = 'utf-8' - no_stylesheets = True - # alternative candidate for keep_only_tags: - # [{'attrs':{'class':['pjgm-posttitle', 'pjgm-postmeta', 'pjgm-postcontent']}}] - keep_only_tags = [ - dict(name='div', attrs={'class': re.compile(r'\bpost\b')})] - remove_tags = [ - dict(name='div', attrs={'class': re.compile(r'\bsharedaddy\b')})] - - def get_archived_posts(self): - soup = self.index_to_soup('http://slatestarcodex.com/archives/') - entries = soup.findAll(attrs={'class': 'sya_postcontent'}) - - posts = [] - for entry in entries: - atag = entry.find('a') - url = atag['href'] - post = { - 'title': atag.contents[0], - 'url': url, - 'date': "-".join(url.strip('/').split('/')[-4:-1]), - } - posts.append(post) - return posts - - def parse_index(self): - posts = self.get_archived_posts() - return [[self.title, posts]] diff --git a/recipes/slovo.recipe b/recipes/slovo.recipe deleted file mode 100644 index b1c42b2459..0000000000 --- a/recipes/slovo.recipe +++ /dev/null @@ -1,41 +0,0 @@ -import re - -from calibre.web.feeds.news import BasicNewsRecipe - - -class SlovoRecipe(BasicNewsRecipe): - __license__ = 'GPL v3' - __author__ = 'Abelturd' - language = 'sk' - version = 1 - - title = u'SLOVO' - publisher = u'' - category = u'News, Newspaper' - description = u'Politicko-spolo\u010densk\xfd t\xfd\u017edenn\xedk' - encoding = 'Windows-1250' - - oldest_article = 1 - max_articles_per_feed = 100 - use_embedded_content = False - remove_empty_feeds = True - - no_stylesheets = True - remove_javascript = True - - feeds = [] - feeds.append((u'V\u0161etky \u010dl\xe1nky', - u'http://www.noveslovo.sk/rss.asp')) - - keep_only_tags = [] - remove_tags = [] - - preprocess_regexps = [ - (re.compile(r'', re.DOTALL | re.IGNORECASE), - lambda match: ''), - ] - - def print_version(self, url): - m = re.search('(?<=id=)[0-9]*', url) - - return u'http://www.noveslovo.sk/clanoktlac.asp?id=' + str(m.group(0)) diff --git a/recipes/sme.recipe b/recipes/sme.recipe deleted file mode 100644 index bd636b0120..0000000000 --- a/recipes/sme.recipe +++ /dev/null @@ -1,79 +0,0 @@ -import re - -from calibre.web.feeds.news import BasicNewsRecipe - - -class SmeRecipe(BasicNewsRecipe): - __license__ = 'GPL v3' - __author__ = 'kwetal' - language = 'sk' - version = 1 - - title = u'SME' - publisher = u'' - category = u'News, Newspaper' - description = u'News from Slovakia' - - oldest_article = 1 - max_articles_per_feed = 100 - use_embedded_content = False - remove_empty_feeds = True - - no_stylesheets = True - remove_javascript = True - - # Feeds from: http://rss.sme.sk/ - feeds = [] - feeds.append( - (u'Tituln\u00E1 strana', u'http://rss.sme.sk/rss/rss.asp?id=frontpage')) - feeds.append((u'Naj\u010D\u00EDtanej\u0161ie za 4 hodiny', - u'http://rss.sme.sk/rss/rss.asp?id=smenajcit4')) - feeds.append((u'Naj\u010D\u00EDtanej\u0161ie za 24 hod\u00EDn', - u'http://rss.sme.sk/rss/rss.asp?id=smenajcit24')) - feeds.append( - (u'Z domova', u'http://rss.sme.sk/rss/rss.asp?sek=smeonline&rub=online_zdom')) - feeds.append((u'Zahrani\u010Die', - u'http://rss.sme.sk/rss/rss.asp?sek=smeonline&rub=online_zahr')) - feeds.append((u'Z domova + zahrani\u010Die', - u'http://rss.sme.sk/rss/rss.asp?sek=smeonline')) - feeds.append((u'Ekonomika', u'http://rss.sme.sk/rss/rss.asp?sek=ekon')) - feeds.append((u'Kult\u00FAra', u'http://rss.sme.sk/rss/rss.asp?sek=kult')) - feeds.append( - (u'Koment\u00E1re', u'http://rss.sme.sk/rss/rss.asp?sek=koment')) - feeds.append((u'Volby', u'http://rss.sme.sk/rss/rss.asp?sek=eVolby')) - feeds.append((u'\u0160port', u'http://rss.sme.sk/rss/rss.asp?sek=sport')) - feeds.append((u'Futbal', u'http://rss.sme.sk/rss/rss.asp?sek=futbal')) - feeds.append((u'Hokej', u'http://rss.sme.sk/rss/rss.asp?sek=hokej')) - feeds.append((u'Po\u010D\u00EDta\u010De', - u'http://rss.sme.sk/rss/rss.asp?sek=pocit')) - feeds.append((u'Mobil', u'http://rss.sme.sk/rss/rss.asp?sek=mobil')) - feeds.append((u'Veda', u'http://rss.sme.sk/rss/rss.asp?sek=veda')) - feeds.append((u'Natankuj', u'http://rss.sme.sk/rss/rss.asp?sek=natankuj')) - feeds.append((u'Auto', u'http://rss.sme.sk/rss/rss.asp?sek=auto')) - feeds.append( - (u'Dom\u00E1cnos\u0165', u'http://rss.sme.sk/rss/rss.asp?sek=domac')) - feeds.append((u'\u017Dena', u'http://rss.sme.sk/rss/rss.asp?sek=zena')) - feeds.append((u'Z\u00E1bava', u'http://rss.sme.sk/rss/rss.asp?sek=zabava')) - feeds.append((u'Hry', u'http://rss.sme.sk/rss/rss.asp?sek=hry')) - - keep_only_tags = [] - keep_only_tags.append(dict(name='div', attrs={'id': 'contenth'})) - keep_only_tags.append(dict(name='div', attrs={'class': 'articlec col'})) - - remove_tags = [] - remove_tags.append( - dict(name='div', attrs={'id': re.compile('smeplayer.*')})) - - remove_tags_after = [dict(name='p', attrs={'class': 'autor_line'})] - - extra_css = ''' - @font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} - @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/LiberationSans.ttf)} - body {font-family: sans1, serif1;} - ''' - - def print_version(self, url): - parts = url.split('/') - id = parts[4] - - return u'http://korzar.sme.sk/clanok_tlac.asp?cl=' + str(id) diff --git a/recipes/smilezilla.recipe b/recipes/smilezilla.recipe deleted file mode 100644 index f483715515..0000000000 --- a/recipes/smilezilla.recipe +++ /dev/null @@ -1,68 +0,0 @@ -from __future__ import absolute_import, division, print_function, unicode_literals - -import os -import re - -from calibre.ptempfile import PersistentTemporaryDirectory -from calibre.web.feeds.news import BasicNewsRecipe - - -class SmileZilla(BasicNewsRecipe): - - title = 'SmileZilla' - language = 'en' - __author__ = "Will" - JOKES_INDEX = 'http://www.smilezilla.com/joke.do' - STORIES_INDEX = 'http://www.smilezilla.com/story.do' - description = 'Daily Jokes and funny stoires' - oldest_article = 1 - no_stylesheets = True - encoding = 'utf-8' - - remove_tags = [dict(name='table')] - - def _get_entry(self, soup): - return soup.find('form', attrs={'name': 'contentForm'}) - - def _get_section_title(self, soup): - title_div = soup.find('div', attrs={'class': 'title'}) - return self.tag_to_string(title_div).strip() - - def parse_index(self): - self.tdir = PersistentTemporaryDirectory() - - def as_soup(url): - soup = self.index_to_soup(url) - for img in soup.findAll('img', src=True): - if img['src'].startswith('/'): - img['src'] = 'http://www.smilezilla.com' + img['src'] - return soup - - articles = [] - - soup = as_soup(self.JOKES_INDEX) - jokes_entry = self._get_entry(soup) - section_title = self._get_section_title(soup) - todays_jokes = [] - for i, text in enumerate(re.findall(r'(.+?)') - f.write(text.encode('utf-8')) - todays_jokes.append({'title': title, 'url': 'file:///' + f.name}) - articles.append((section_title, todays_jokes)) - - soup = as_soup(self.STORIES_INDEX) - entry = self._get_entry(soup) - section_title = self._get_section_title(soup) - - todays_stories = [] - for i, text in enumerate(re.findall(r'(.+?)') - f.write(text.encode('utf-8')) - todays_stories.append({'title': title, 'url': 'file:///' + f.name}) - articles.append((section_title, todays_stories)) - - return articles diff --git a/recipes/sn_dk.recipe b/recipes/sn_dk.recipe deleted file mode 100644 index e8ac9a301e..0000000000 --- a/recipes/sn_dk.recipe +++ /dev/null @@ -1,27 +0,0 @@ -#!/usr/bin/env python -# vim:fileencoding=utf-8 -# https://manual.calibre-ebook.com/news_recipe.html -from __future__ import absolute_import, division, print_function, unicode_literals - -from calibre.web.feeds.news import BasicNewsRecipe - -''' -sn.dk -''' - - -class Sn_dk(BasicNewsRecipe): - __author__ = 'CoderAllan.github.com' - title = 'sn.dk' - description = 'Sjællandske medier' - category = 'newspaper, news, localnews, culture, Denmark' - oldest_article = 7 - max_articles_per_feed = 50 - auto_cleanup = True - language = 'da' - - feeds = [ - ('Nyheder', 'http://sn.dk/rss'), - - ] - diff --git a/recipes/snopes.recipe b/recipes/snopes.recipe deleted file mode 100644 index fe16b534a0..0000000000 --- a/recipes/snopes.recipe +++ /dev/null @@ -1,43 +0,0 @@ -__license__ = 'GPL v3' -__copyright__ = '2010, Starson17' -''' -snopes.com -''' -from calibre.web.feeds.recipes import BasicNewsRecipe - - -class Snopes(BasicNewsRecipe): - title = 'Snopes' - __author__ = 'Starson17' - description = 'Urban Legends' - oldest_article = 21 - max_articles_per_feed = 100 - no_stylesheets = True - use_embedded_content = False - encoding = 'utf8' - publisher = 'Snopes' - category = 'news, ' - language = 'en' - publication_type = 'newsportal' - remove_javascript = True - no_stylesheets = True - - conversion_options = { - 'comments': description, 'tags': category, 'language': language, 'publisher': publisher, 'linearize_tables': True - } - - keep_only_tags = [ - dict(name='h1'), - dict(name='div', attrs={'class': ['article_text']}), - ] - - feeds = [ - ('Snopes', 'http://www.snopes.com/info/whatsnew.xml'), - ] - - extra_css = ''' - h1{font-family:Trebuchet MS,Bookman Old Style,Arial;color:#75b570} - h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:medium;} - p{font-family:Arial,Helvetica,sans-serif;font-size:small;} - body{font-family:Arial,Helvetica,sans-serif;font-size:small;} - ''' diff --git a/recipes/socialdiva.recipe b/recipes/socialdiva.recipe deleted file mode 100644 index 9ad6997361..0000000000 --- a/recipes/socialdiva.recipe +++ /dev/null @@ -1,53 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- - -__license__ = 'GPL v3' -__copyright__ = u'2011' -''' -socialdiva.ro -''' - -from calibre.web.feeds.news import BasicNewsRecipe - - -class SocialDiva(BasicNewsRecipe): - title = u'Social Diva' - __author__ = u'Silviu Cotoara' - description = u'When in doubt, wear red' - publisher = 'Social Diva' - oldest_article = 5 - language = 'ro' - max_articles_per_feed = 100 - no_stylesheets = True - use_embedded_content = False - category = 'Ziare,Reviste,Femei' - encoding = 'utf-8' - cover_url = 'http://www.socialdiva.ro/images/logo.png' - - conversion_options = { - 'comments': description, 'tags': category, 'language': language, 'publisher': publisher - } - - keep_only_tags = [ - dict(name='div', attrs={'class': 'col-alpha mt5 content_articol'}), - dict(name='div', attrs={'class': 'mt5'}) - ] - - remove_tags = [ - dict(name='a', attrs={'class': ['comments float-left scroll mt5']}), - dict(name='a', attrs={'class': ['comments float-left scroll']}), - dict(name='div', attrs={ - 'class': ['rating-container relative float-left']}), - dict(name='div', attrs={'class': ['float-right social_articol']}) - ] - - remove_tags_after = [ - dict(name='a', attrs={'class': ['comments float-left scroll mt5']}) - ] - - feeds = [ - (u'Feeds', u'http://www.socialdiva.ro/rss.html') - ] - - def preprocess_html(self, soup): - return self.adeify_images(soup) diff --git a/recipes/soenderborglokalavisen_dk.recipe b/recipes/soenderborglokalavisen_dk.recipe deleted file mode 100644 index 42507b262b..0000000000 --- a/recipes/soenderborglokalavisen_dk.recipe +++ /dev/null @@ -1,32 +0,0 @@ -#!/usr/bin/env python -# vim:fileencoding=utf-8 -# https://manual.calibre-ebook.com/news_recipe.html -from __future__ import absolute_import, division, print_function, unicode_literals - -from calibre.web.feeds.news import BasicNewsRecipe - -''' -Lokalavisen Sønderborg -''' - - -class SoenderborgLokalavisen_dk(BasicNewsRecipe): - __author__ = 'CoderAllan.github.com' - title = 'Lokalavisen Sønderborg' - description = 'Lokale og regionale nyheder, sport, kultur fra Sønderborg og omegn på soenderborg.lokalavisen.dk' - category = 'newspaper, news, localnews, sport, culture, Denmark' - oldest_article = 7 - max_articles_per_feed = 50 - auto_cleanup = True - language = 'da' - - feeds = [ - ('Seneste nyt fra Lokalavisen Sønderborg', 'http://soenderborg.lokalavisen.dk/section/senestenytrss'), - ('Seneste lokale nyheder fra Lokalavisen Sønderborg', 'http://soenderborg.lokalavisen.dk/section/senestelokalenyhederrss'), - ('Seneste sport fra Lokalavisen Sønderborg', 'http://soenderborg.lokalavisen.dk/section/senestesportrss'), - ('Seneste 112 nyheder fra Lokalavisen Sønderborg', 'http://soenderborg.lokalavisen.dk/section/seneste112rss'), - ('Seneste kultur nyheder fra Lokalavisen Sønderborg', 'http://soenderborg.lokalavisen.dk/section/senestekulturrss'), - ('Seneste læserbreve fra Lokalavisen Sønderborg', 'http://soenderborg.lokalavisen.dk/section/senestelaeserbreverss'), - - ] - diff --git a/recipes/soldiers.recipe b/recipes/soldiers.recipe deleted file mode 100644 index d5e9f526c2..0000000000 --- a/recipes/soldiers.recipe +++ /dev/null @@ -1,42 +0,0 @@ - -__license__ = 'GPL v3' -__copyright__ = '2009, Darko Miletic ' -''' -www.army.mil/soldiers/ -''' - -from calibre.web.feeds.news import BasicNewsRecipe - - -class Soldiers(BasicNewsRecipe): - title = 'Soldiers' - __author__ = 'Darko Miletic' - description = 'The Official U.S. Army Magazine' - oldest_article = 30 - max_articles_per_feed = 100 - no_stylesheets = True - use_embedded_content = False - auto_cleanup = True - auto_cleanup_keep = '//div[@id="mediaWrapper"]' - simultaneous_downloads = 1 - delay = 4 - max_connections = 1 - encoding = 'utf-8' - publisher = 'U.S. Army' - category = 'news, politics, war, weapons' - language = 'en' - INDEX = 'http://www.army.mil/soldiers/' - - conversion_options = { - 'comment': description, 'tags': category, 'publisher': publisher, 'language': language - } - - feeds = [(u'Frontpage', u'http://www.army.mil/rss/2/')] - - def get_cover_url(self): - cover_url = None - soup = self.index_to_soup(self.INDEX) - cover_item = soup.find('img', attrs={'alt': 'Current Magazine Cover'}) - if cover_item: - cover_url = cover_item['src'] - return cover_url diff --git a/recipes/something_awful.recipe b/recipes/something_awful.recipe deleted file mode 100644 index cb585261d4..0000000000 --- a/recipes/something_awful.recipe +++ /dev/null @@ -1,89 +0,0 @@ -from calibre.web.feeds.news import BasicNewsRecipe - - -class SomethingAwfulRecipe(BasicNewsRecipe): - title = 'Something Awful' - __author__ = 'atordo' - description = 'The Internet Makes You Stupid' - cover_url = 'http://i.somethingawful.com/core/head-logo-bluegren.png' - oldest_article = 15 - max_articles_per_feed = 50 - auto_cleanup = False - no_stylesheets = True - remove_javascript = True - language = 'en' - use_embedded_content = False - remove_empty_feeds = True - publication_type = 'magazine' - reverse_article_order = True - - recursions = 1 - match_regexps = [r'^http://www.somethingawful.com/.+/.+/\d{1,2}/$'] - - remove_attributes = ['align', 'alt', 'valign'] - - keep_only_tags = [ - dict(name='div', attrs={'class': 'article_head'}), dict(name='div', attrs={ - 'class': 'organ article'}), dict(name='ul', attrs={'class': 'pager'}) - ] - - extra_css = ''' - .author{font-size:small} .date{font-size:small} - .byline{font-size:small} .font_big{font-size:large} - .compat5{font-weight:bold} .accentbox{background-color:#E3E3E3; border:solid black} - img{margin-bottom:0.4em; display:block; margin-left: auto; margin-right:auto} - ''' - - feeds = [ - - ('Photoshop Phriday', 'http://www.somethingawful.com/rss/photoshop-phriday.rss.xml'), - ('Comedy Goldmine', 'http://www.somethingawful.com/rss/comedy-goldmine.rss.xml'), - # ('The Flash Tub', 'http://www.somethingawful.com/rss/flash-tub.rss.xml') - # ('Downloads', 'http://www.somethingawful.com/rss/downloads.rss.xml') - # ('AwfulVision', 'http://www.somethingawful.com/rss/awfulvision.rss.xml') - ('Awful Link of the Day', 'http://www.somethingawful.com/rss/awful-links.rss.xml'), - ('Fake Something Awfuls', 'http://www.somethingawful.com/rss/fake-something-awful.rss.xml'), - ('The Barbarian\'s Dojo', 'http://www.somethingawful.com/rss/steve-sumner.rss.xml'), - ('The Great Goon Database', 'http://www.somethingawful.com/rss/great-goon-database.rss.xml'), - ('Livejournal Theater', 'http://www.somethingawful.com/rss/livejournal-theater.rss.xml'), - ('Joystick Token Healthpack', 'http://www.somethingawful.com/rss/token-healthpack.rss.xml'), - ('Webcam Ward', 'http://www.somethingawful.com/rss/webcam-ward.rss.xml'), - ('Features / Articles', 'http://www.somethingawful.com/rss/feature-articles.rss.xml'), - ('Guides', 'http://www.somethingawful.com/rss/guides.rss.xml'), - ('Legal Threats', 'http://www.somethingawful.com/rss/legal-threats.rss.xml'), - ('Pranks [ICQ]', 'http://www.somethingawful.com/rss/icq-pranks.rss.xml'), - ('State Og', 'http://www.somethingawful.com/rss/state-og.rss.xml'), - ('Everquest', 'http://www.somethingawful.com/rss/everquest.rss.xml'), - ('Pranks [Email]', 'http://www.somethingawful.com/rss/email-pranks.rss.xml'), - ('The Weekend Web', 'http://www.somethingawful.com/rss/weekend-web.rss.xml'), - ('Daily Dirt', 'http://www.somethingawful.com/rss/daily-dirt.rss.xml'), - ('The Art of Warcraft', 'http://www.somethingawful.com/rss/art-of-warcraft.rss.xml'), - ('Video Game Article', 'http://www.somethingawful.com/rss/video-game-article.rss.xml'), - ('The Awful Movie Database', 'http://www.somethingawful.com/rss/awful-movie-database.rss.xml'), - ('Pregame Wrapup', 'http://www.somethingawful.com/rss/pregame-wrapup.rss.xml'), - ('Second Life Safari', 'http://www.somethingawful.com/rss/second-life-safari.rss.xml'), - ('The Hogosphere', 'http://www.somethingawful.com/rss/hogosphere.rss.xml'), - ('Front Page News', 'http://www.somethingawful.com/rss/news.rss.xml'), - ('Forum Friday\'s Monday', 'http://www.somethingawful.com/rss/forum-fridays.rss.xml'), - ('Cliff Yablonski Hates You', 'http://www.somethingawful.com/rss/cliff-yablonski.rss.xml'), - ('Manifestos From the Internet', 'http://www.somethingawful.com/rss/manifestos-from-internet.rss.xml'), - ('Johnston Checks In', 'http://www.somethingawful.com/rss/levi-johnston.rss.xml'), - ('Twitter Tuesday', 'http://www.somethingawful.com/rss/twitter-tuesday.rss.xml'), - ('Music Article', 'http://www.somethingawful.com/rss/music-article.rss.xml'), - ('The Everdraed Showcase', 'http://www.somethingawful.com/rss/everdraed-showcase.xml'), - ('Reviews [Games]', 'http://www.somethingawful.com/rss/game-reviews.rss.xml'), - ('Reviews [Movies]', 'http://www.somethingawful.com/rss/movie-reviews.rss.xml'), - ('Rom Pit', 'http://www.somethingawful.com/rss/rom-pit.rss.xml'), - ('Truth Media [Reviews]', 'http://www.somethingawful.com/rss/truth-media-reviews.rss.xml'), - ('Truth Media [Flames]', 'http://www.somethingawful.com/rss/truth-media-flames.rss.xml'), - ('Awful Anime', 'http://www.somethingawful.com/rss/hentai-game-reviews.rss.xml'), - ('The Horrors of Pornography', 'http://www.somethingawful.com/rss/horrors-of-porn.rss.xml'), - ('Your Band Sucks', 'http://www.somethingawful.com/rss/your-band-sucks.rss.xml'), - ('Fashion SWAT', 'http://www.somethingawful.com/rss/fashion-swat.rss.xml'), - ('MMO Roulette', 'http://www.somethingawful.com/rss/mmo-roulette.rss.xml'), - ('The Most Awful', 'http://www.somethingawful.com/rss/most-awful.rss.xml'), - ('Garbage Day', 'http://www.somethingawful.com/rss/garbage-day.rss.xml'), - ('WTF, D&D!?', 'http://www.somethingawful.com/rss/dungeons-and-dragons.rss.xml'), - ('Current Releases', 'http://www.somethingawful.com/rss/current-movie-reviews.rss.xml'), - ('Awful Things for Sale', 'http://www.somethingawful.com/rss/awful-things-sale.xml') - ] diff --git a/recipes/sondagsavisen_dk.recipe b/recipes/sondagsavisen_dk.recipe deleted file mode 100644 index 4b26f3eebc..0000000000 --- a/recipes/sondagsavisen_dk.recipe +++ /dev/null @@ -1,26 +0,0 @@ -#!/usr/bin/env python -# vim:fileencoding=utf-8 -# https://manual.calibre-ebook.com/news_recipe.html -from __future__ import absolute_import, division, print_function, unicode_literals - -from calibre.web.feeds.news import BasicNewsRecipe - -''' -Søndagsavisen.dk -''' - - -class Sondagsavisen_dk(BasicNewsRecipe): - __author__ = 'CoderAllan.github.com' - title = 'Søndagsavisen.dk' - description = 'Danmarks mest læste avis' - category = 'newspaper, news, localnews, home, health, decoration, Denmark' - oldest_article = 7 - max_articles_per_feed = 50 - auto_cleanup = True - language = 'da' - - # Feed are found here: http://www.sondagsavisen.dk/ - feeds = [ - ('Søndagsavisen.dk', 'http://www.sondagsavisen.dk/feed'), - ] diff --git a/recipes/southernstar.recipe b/recipes/southernstar.recipe deleted file mode 100644 index dda6948750..0000000000 --- a/recipes/southernstar.recipe +++ /dev/null @@ -1,144 +0,0 @@ -#!/usr/bin/env python - -__license__ = 'GPL v3' -__copyright__ = '2012, watou' -''' -southernstar.ie -''' -import codecs -import os -import re -import tempfile - -from calibre.ebooks.BeautifulSoup import NavigableString, Tag -from calibre.web.feeds.news import BasicNewsRecipe - - -def new_tag(soup, name, attrs=()): - impl = getattr(soup, 'new_tag', None) - if impl is not None: - return impl(name, attrs=dict(attrs)) - return Tag(soup, name, attrs=attrs or None) - - -class TheSouthernStar(BasicNewsRecipe): - - title = 'The Southern Star' - __author__ = 'watou' - description = 'West Cork\'s leading news and information provider since 1889' - NEWS_INDEX = 'http://www.southernstar.ie/news.php' - LOCAL_NOTES = 'http://www.southernstar.ie/localnotes.php' - SPORT_INDEX = 'http://www.southernstar.ie/sport.php' - CLASSIFIEDS = 'http://www.southernstar.ie/classifieds.php' - language = 'en_IE' - encoding = 'cp1252' - - publication_type = 'newspaper' - masthead_url = 'http://www.southernstar.ie/images/logo.gif' - remove_tags_before = dict(name='div', attrs={'class': 'article'}) - remove_tags_after = dict(name='div', attrs={'class': 'article'}) - remove_tags = [dict(name='div', attrs={'style': 'width:300px; position:relative'}), - dict(name='form'), - dict(name='div', attrs={'class': 'endpanel'})] - no_stylesheets = True - tempfiles = [] - pubdate = '' - - preprocess_regexps = [(re.compile(r'', re.DOTALL), lambda m: '')] - - def parse_index(self): - feeds = [] - seen_titles = set() - - articles = self.fetch_ss_articles(self.NEWS_INDEX, seen_titles) - if articles: - feeds.append(('News', articles)) - - articles = self.fetch_ss_notes(self.LOCAL_NOTES) - if articles: - feeds.append(('Local Notes', articles)) - - articles = self.fetch_ss_articles(self.SPORT_INDEX, seen_titles) - if articles: - feeds.append(('Sport', articles)) - - articles = self.fetch_ss_notes(self.CLASSIFIEDS) - if articles: - feeds.append(('Classifieds', articles)) - - return feeds - - def fetch_ss_articles(self, index, seen_titles): - articles = [] - soup = self.index_to_soup(index) - ts = soup.find('div', {'class': 'article'}) - ds = self.tag_to_string(ts.find('strong')) - self.pubdate = ' [' + ds + ']' - self.timefmt = ' [%s]' % ds - - for post in ts.findAll('h1'): - a = post.find('a', href=True) - title = self.tag_to_string(a) - if title in seen_titles: - continue - seen_titles.add(title) - url = a['href'] - if url.startswith('article'): - url = 'http://www.southernstar.ie/' + url - self.log('\tFound article:', title, 'at', url) - p = post.findNextSibling('p') - desc = None - if p is not None: - desc = str(p) - articles.append({'title': title, 'url': url, 'description': desc, - 'date': self.pubdate}) - - return articles - - def fetch_ss_notes(self, page): - articles = [] - - soup = self.index_to_soup(page) - ts = soup.find('div', {'class': 'content'}) - for post in ts.findAll('h1'): - title = self.tag_to_string(post) - self.log('\tFound note:', title) - f = tempfile.NamedTemporaryFile(suffix='.html', delete=False) - f.close() - f = codecs.open(f.name, 'w+b', self.encoding, 'replace') - url = "file://" + f.name - f.write(u'

' + title + '

') - f.write(str(post.findNextSibling('p'))) - f.write(u'') - self.log('\tWrote note to', f.name) - f.close() - self.tempfiles.append(f) - articles.append({'title': title, 'url': url, 'date': self.pubdate}) - - return articles - - def postprocess_html(self, soup, first): - for table in soup.findAll('table', align='right'): - img = table.find('img') - if img is not None: - img.extract() - caption = self.tag_to_string(table).strip() - div = new_tag(soup, 'div') - div['style'] = 'text-align:center' - div.insert(0, img) - div.insert(1, new_tag(soup, 'br')) - if caption: - div.insert(2, NavigableString(caption)) - table.replaceWith(div) - - return soup - - def image_url_processor(self, baseurl, url): - return url.replace(' ', '%20') - - def cleanup(self): - self.log('cleaning up') - for f in self.tempfiles: - os.unlink(f.name) - self.tempfiles = [] diff --git a/recipes/spin_magazine.recipe b/recipes/spin_magazine.recipe deleted file mode 100644 index 8ff4371cc1..0000000000 --- a/recipes/spin_magazine.recipe +++ /dev/null @@ -1,18 +0,0 @@ -from calibre.web.feeds.news import BasicNewsRecipe - - -class AdvancedUserRecipe1296179411(BasicNewsRecipe): - title = u'SPIN Magzine' - __author__ = 'Quistopher' - language = 'en' - oldest_article = 7 - max_articles_per_feed = 100 - - feeds = [ - (u'Daily Noise Blog | SPIN.com', u'http://www.spin.com/blog/feed'), - (u'It Happened Last Night | SPIN.com', - u'http://www.spin.com/it-happened-last-night/feed'), - (u'Album Reviews | SPIN.com', - u'http://www.spin.com/album-reviews/feed') - - ] diff --git a/recipes/sportowefakty.recipe b/recipes/sportowefakty.recipe deleted file mode 100644 index fe613235a6..0000000000 --- a/recipes/sportowefakty.recipe +++ /dev/null @@ -1,60 +0,0 @@ -#!/usr/bin/env python - -__license__ = 'GPL v3' - -import re - -from calibre.web.feeds.news import BasicNewsRecipe - - -class sportowefakty(BasicNewsRecipe): - title = u'SportoweFakty' - __author__ = 'Artur Stachecki , Tomasz Długosz ' - language = 'pl' - description = u'Najważniejsze informacje sportowe z kraju i ze świata, relacje, komentarze, wywiady, zdjęcia!' - oldest_article = 1 - masthead_url = 'http://www.sportowefakty.pl/images/logo.png' - max_articles_per_feed = 100 - simultaneous_downloads = 5 - use_embedded_content = False - remove_javascript = True - no_stylesheets = True - ignore_duplicate_articles = {'title', 'url'} - - keep_only_tags = [dict(attrs={'class': 'box-article'})] - remove_tags = [] - remove_tags.append(dict(attrs={'class': re.compile(r'^newsStream')})) - remove_tags.append(dict(attrs={'target': '_blank'})) - - feeds = [ - (u'Piłka Nożna', u'http://www.sportowefakty.pl/pilka-nozna/index.rss'), - (u'Koszykówka', u'http://www.sportowefakty.pl/koszykowka/index.rss'), - (u'Żużel', u'http://www.sportowefakty.pl/zuzel/index.rss'), - (u'Siatkówka', u'http://www.sportowefakty.pl/siatkowka/index.rss'), - (u'Zimowe', u'http://www.sportowefakty.pl/zimowe/index.rss'), - (u'Hokej', u'http://www.sportowefakty.pl/hokej/index.rss'), - (u'Moto', u'http://www.sportowefakty.pl/moto/index.rss'), - (u'Tenis', u'http://www.sportowefakty.pl/tenis/index.rss') - ] - - def get_article_url(self, article): - link = article.get('link', None) - if 'utm_source' in link: - return link.split('?utm')[0] - else: - return link - - def print_version(self, url): - print_url = url + '/drukuj' - return print_url - - def preprocess_html(self, soup): - head = soup.find('h1') - if 'Fotorelacja' in self.tag_to_string(head): - return None - else: - for alink in soup.findAll('a'): - if alink.string is not None: - tstr = alink.string - alink.replaceWith(tstr) - return soup diff --git a/recipes/sporza_be.recipe b/recipes/sporza_be.recipe deleted file mode 100644 index ae01107aba..0000000000 --- a/recipes/sporza_be.recipe +++ /dev/null @@ -1,52 +0,0 @@ -#!/usr/bin/env python - -from calibre.web.feeds.news import BasicNewsRecipe - - -class Sporzabe(BasicNewsRecipe): - title = u'Sporza.be' - __author__ = u'erkfuizfeuadjfjzefzfuzeff' - description = u'Sport news from Belgium in Dutch' - oldest_article = 7 - language = 'nl_BE' - - max_articles_per_feed = 100 - no_stylesheets = True - use_embedded_content = False - - keep_only_tags = [ - dict(name='title'), dict(name='div', attrs={'id': 'intro'}), dict(name='h3'), - dict(name='h1'), dict(name='span', attrs={'class': 'media_holder'}), - dict(name='div', attrs={'class': 'divider image'}), - dict(name='div', attrs={'class': 'paragraph'}) - ] - - remove_tags = [] - - feeds = [ - ( - u'Voetbal', u'http://sporza.be/cm/sporza/voetbal?mode=atom&action=submit' - ), - ( - u'Wielrennen', - u'http://sporza.be/cm/sporza/wielrennen?mode=atom&action=submit' - ), - ( - u'Tennis', u'http://sporza.be/cm/sporza/tennis?mode=atom&action=submit' - ), - ( - u'Auto en Motor', - u'http://sporza.be/cm/sporza/auto_motor?mode=atom&action=submit' - ), - ( - u'Atletiek', - u'http://sporza.be/cm/sporza/atletiek?mode=atom&action=submit' - ), - ( - u'Zaal', u'http://sporza.be/cm/sporza/zaal?mode=atom&action=submit' - ), - ( - u'Ander nieuws', - u'http://sporza.be/cm/sporza/ander_nieuws?mode=atom&action=submit' - ) - ] diff --git a/recipes/rian_eng.recipe b/recipes/sputnik.recipe similarity index 57% rename from recipes/rian_eng.recipe rename to recipes/sputnik.recipe index ac40504ee6..f11c48da5c 100644 --- a/recipes/rian_eng.recipe +++ b/recipes/sputnik.recipe @@ -1,4 +1,4 @@ - +#!/usr/bin/env python __license__ = 'GPL v3' __copyright__ = '2009, Darko Miletic ' ''' @@ -9,28 +9,21 @@ from calibre.web.feeds.news import BasicNewsRecipe class Ria_eng(BasicNewsRecipe): - title = 'Ria Novosti' + title = 'Sputnik News' __author__ = 'Darko Miletic' description = 'News from Russia in English' language = 'en_RU' - publisher = 'en.rian.ru' category = 'news, politics, Russia' oldest_article = 3 max_articles_per_feed = 100 no_stylesheets = True use_embedded_content = False encoding = 'utf-8' + auto_cleanup = True conversion_options = { 'comment': description, 'tags': category, 'publisher': publisher, 'language': language } - keep_only_tags = [dict(name='div', attrs={'class': 'article'})] - remove_tags = [ - dict(name=['object', 'link', 'iframe', 'base']), dict(name='div', attrs={'class': [ - 'related', 'mmban', 'view-story']}), dict(name='span', attrs={'class': 'copyright'}) - ] - remove_tags_after = dict(name='div', attrs={'class': 'text'}) - - feeds = [(u'Online news', u'http://en.rian.ru/export/rss2/archive/index.xml')] + feeds = [(u'News', u'https://sputnikglobe.com/export/rss2/archive/index.xml')] diff --git a/recipes/stamgasten.recipe b/recipes/stamgasten.recipe deleted file mode 100644 index c101bf5ad8..0000000000 --- a/recipes/stamgasten.recipe +++ /dev/null @@ -1,20 +0,0 @@ -from calibre.web.feeds.news import BasicNewsRecipe - - -class AdvancedUserRecipe1347706704(BasicNewsRecipe): - title = u'Stamgasten' - __author__ = u'DrMerry' - description = u'Stamgasten de populaire strip van Toon van Driel (http://www.toonvandriel.nl)' - language = u'nl' - oldest_article = 7 - max_articles_per_feed = 100 - auto_cleanup = False - cover_url = 'http://shop.toonvandriel.nl/productimg.php?type=canvas&id=15&size=large' - no_stylesheets = True - remove_javascript = True - remove_empty_feeds = True - remove_tags_before = dict(id='title') - remove_tags_after = dict(attrs={'class': 'entry-content rich-content'}) - extra_css = 'img{border:0;padding:0;margin:0;width:100%}' - - feeds = [(u'Stamgasten', u'http://toonvandriel.nl/feed/')] diff --git a/recipes/standardmoney.recipe b/recipes/standardmoney.recipe deleted file mode 100644 index f28ee28ca8..0000000000 --- a/recipes/standardmoney.recipe +++ /dev/null @@ -1,41 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- - -__license__ = 'GPL v3' -__copyright__ = u'2011, Silviu Cotoar\u0103' -''' -standard.money.ro -''' - -from calibre.web.feeds.news import BasicNewsRecipe - - -class StandardMoneyRo(BasicNewsRecipe): - title = 'Standard Money Ro' - __author__ = u'Silviu Cotoar\u0103' - publisher = 'Standard Money' - description = 'Portal de Business' - oldest_article = 5 - language = 'ro' - max_articles_per_feed = 100 - no_stylesheets = True - use_embedded_content = False - category = 'Ziare,Stiri,Romania' - encoding = 'utf-8' - cover_url = 'http://assets.standard.ro/wp-content/themes/standard/images/standard-logo.gif' - - conversion_options = { - 'comments': description, 'tags': category, 'language': language, 'publisher': publisher - } - - keep_only_tags = [ - dict(name='h1', attrs={'class': 'post-title'} - ), dict(name='div', attrs={'class': 'content_post'}) - ] - - feeds = [ - (u'Actualitate', u'http://standard.money.ro/feed') - ] - - def preprocess_html(self, soup): - return self.adeify_images(soup) diff --git a/recipes/stars_and_stripes.recipe b/recipes/stars_and_stripes.recipe deleted file mode 100644 index a587b47d24..0000000000 --- a/recipes/stars_and_stripes.recipe +++ /dev/null @@ -1,35 +0,0 @@ -''' Stars and Stripes - ''' - - -import re - -from calibre.web.feeds.recipes import BasicNewsRecipe - - -class AdvancedUserRecipe1308791026(BasicNewsRecipe): - title = u'Stars and Stripes' - oldest_article = 3 - max_articles_per_feed = 100 - __author__ = 'adoucette' - description = 'The U.S. militarys independent news source, featuring exclusive reports from Iraq, Afghanistan, Europe and the Far East.' - no_stylesheets = True - use_embedded_content = False - encoding = 'utf8' - publisher = 'stripes.com' - category = 'news, US, world' - language = 'en' - publication_type = 'newsportal' - preprocess_regexps = [(re.compile(r'', re.DOTALL), lambda m: '')] - conversion_options = { - 'comments': description, 'tags': category, 'language': language, 'publisher': publisher, 'linearize_tables': True - } - keep_only_tags = [dict(name='div', attrs={'class': ['element article']})] - remove_tags_after = [dict(name='ul', attrs={'class': 'inline-bookmarks'})] - feeds = [ - (u'News', u'http://feeds.stripes.com/starsandstripes/news'), - (u'Sports', u'http://feeds.stripes.com/starsandstripes/sports'), - (u'Military Life', u'http://feeds.stripes.com/starsandstripes/militarylife'), - (u'Opinion', u'http://feeds.stripes.com/starsandstripes/opinion'), - (u'Travel', u'http://feeds.stripes.com/starsandstripes/travel') - ] diff --git a/recipes/starwars.recipe b/recipes/starwars.recipe deleted file mode 100644 index 56ccaf267b..0000000000 --- a/recipes/starwars.recipe +++ /dev/null @@ -1,53 +0,0 @@ -# -*- coding: utf-8 -*- -from calibre.web.feeds.news import BasicNewsRecipe - - -class TheForce(BasicNewsRecipe): - title = u'The Force' - language = 'en' - __author__ = 'Krittika Goyal' - oldest_article = 1 # days - max_articles_per_feed = 25 - encoding = 'cp1252' - - remove_stylesheets = True - conversion_options = {'linearize_tables': True} - remove_tags_after = dict(name='div', attrs={'class': 'KonaBody'}) - keep_only_tags = dict( - name='td', attrs={'background': '/images/span/tile_story_bgtile.gif'}) - remove_tags = [ - dict(name='iframe'), - ] - - feeds = [ - ('The Force', - 'http://www.theforce.net/outnews/tfnrdf.xml'), - ] - - def preprocess_html(self, soup): - for tag in soup.findAll(name='i'): - if 'Remember to join the Star Wars Insider Facebook' in self.tag_to_string(tag): - for x in tag.findAllNext(): - x.extract() - tag.extract() - break - tag = soup.find(attrs={'class': 'articleoption'}) - if tag is not None: - tag = tag.findParent('table') - if tag is not None: - for x in tag.findAllNext(): - x.extract() - tag.extract() - - for img in soup.findAll('img', src=True): - a = img.findParent('a', href=True) - if a is None: - continue - url = a.get('href').split('?')[-1].partition('=')[-1] - if url: - img.extract() - a.name = 'img' - a['src'] = url - del a['href'] - img['src'] = url - return soup diff --git a/recipes/stnn.recipe b/recipes/stnn.recipe deleted file mode 100644 index 7749553441..0000000000 --- a/recipes/stnn.recipe +++ /dev/null @@ -1,54 +0,0 @@ - - -__license__ = 'GPL v3' -__copyright__ = '2010, Larry Chan ' -''' -Singtao STNN -''' -from calibre.web.feeds.recipes import BasicNewsRecipe - - -class SingtaoSTNN(BasicNewsRecipe): - title = 'Singtao STNN' - __author__ = 'Larry Chan, larry1chan' - description = 'Chinese News' - oldest_article = 2 - max_articles_per_feed = 100 - simultaneous_downloads = 5 - no_stylesheets = True - use_embedded_content = False - encoding = 'gb2312' - publisher = 'Singtao STNN' - category = 'news, China, world' - language = 'zh' - publication_type = 'newsportal' - extra_css = ' body{ font-family: Verdana,Helvetica,Arial,sans-serif } .introduction{font-weight: bold} .story-feature{display: block; padding: 0; border: 1px solid; width: 40%; font-size: small} .story-feature h2{text-align: center; text-transform: uppercase} ' # noqa - masthead_url = 'http://www.stnn.cc/images/0806/logo_080728.gif' - conversion_options = { - 'comments': description, 'tags': category, 'language': language, 'publisher': publisher, 'linearize_tables': True - } - - remove_tags_before = dict(name='div', attrs={'class': ['page_box']}) - remove_tags_after = dict(name='div', attrs={'class': ['pagelist']}) - - keep_only_tags = [ - dict(name='div', attrs={'class': ['font_title clearfix']}), - dict(name='div', attrs={'id': ['content_zoom']}) - - ] - - remove_attributes = ['width', 'height', 'href'] - - # for a full list of rss check out [url]http://www.stnn.cc/rss/[/url] - - feeds = [(u'Headline News', u'http://www.stnn.cc/rss/news/index.xml'), - (u'Breaking News', u'http://www.stnn.cc/rss/tufa/index.xml'), - (u'Finance', u'http://www.stnn.cc/rss/fin/index.xml'), - (u'Entertainment', u'http://www.stnn.cc/rss/ent/index.xml'), - (u'International', u'http://www.stnn.cc/rss/guoji/index.xml'), - (u'China', u'http://www.stnn.cc/rss/china/index.xml'), - (u'Opnion', u'http://www.stnn.cc/rss/fin_op/index.xml'), - (u'Blog', u'http://blog.stnn.cc/uploadfile/rssblogtypehotlog.xml'), - (u'Hong Kong', u'http://www.stnn.cc/rss/hongkong/index.xml') - - ] diff --git a/recipes/strategic_culture.recipe b/recipes/strategic_culture.recipe deleted file mode 100644 index df07a09bef..0000000000 --- a/recipes/strategic_culture.recipe +++ /dev/null @@ -1,88 +0,0 @@ - -__license__ = 'GPL v3' -__copyright__ = '2012, Darko Miletic ' - -''' -www.strategic-culture.org -''' - -import time - -from calibre import strftime -from calibre.web.feeds.recipes import BasicNewsRecipe - - -class StrategicCulture(BasicNewsRecipe): - title = 'Strategic Culture Foundation' - __author__ = 'Darko Miletic' - description = 'Online Journal' - publisher = 'Strategic Culture Foundation' - category = 'news, politics' - oldest_article = 7 - max_articles_per_feed = 100 - no_stylesheets = True - encoding = 'utf-8' - use_embedded_content = False - language = 'en' - publication_type = 'newsportal' - masthead_url = 'http://www.strategic-culture.org/img/logo.jpg' - extra_css = ''' - body{font-family: Arial, sans-serif} - h1{font-family: "Times New Roman",Times,serif} - img{margin-bottom: 0.8em} - ''' - - conversion_options = { - 'comment': description, 'tags': category, 'publisher': publisher, 'language': language - } - - keep_only_tags = [ - dict(name=['h1', 'p']), dict(name='div', attrs={'id': 'cke_pastebin'}) - ] - - remove_tags = [dict(name=['object', 'link', 'base', 'meta', 'iframe'])] - - feeds = [ - - (u'News', u'http://www.strategic-culture.org/blocks/news.html'), - (u'Politics', u'http://www.strategic-culture.org/rubrics/politics.html'), - (u'Economics', u'http://www.strategic-culture.org/rubrics/economics.html'), - (u'History & Culture', u'http://www.strategic-culture.org/rubrics/history-and-culture.html'), - (u'Columnists', u'http://www.strategic-culture.org/rubrics/columnists.html') - ] - - def print_version(self, url): - return url.replace('-culture.org/news/', '-culture.org/pview/') - - def parse_index(self): - totalfeeds = [] - lfeeds = self.get_feeds() - for feedobj in lfeeds: - feedtitle, feedurl = feedobj - self.report_progress(0, _('Fetching feed') + ' %s...' % - (feedtitle if feedtitle else feedurl)) - articles = [] - soup = self.index_to_soup(feedurl) - if feedurl.endswith('news.html'): - clname = 'sini14' - else: - clname = 'h22' - checker = [] - for item in soup.findAll('a', attrs={'class': clname}): - atag = item - url = atag['href'] - title = self.tag_to_string(atag) - description = '' - daypart = url.rpartition('/')[0] - mpart, sep, day = daypart.rpartition('/') - ypart, sep, month = mpart.rpartition('/') - year = ypart.rpartition('/')[2] - date = strftime("%a, %d %b %Y %H:%M:%S +0000", - time.strptime(day + "/" + month + "/" + year, "%d/%m/%Y")) - if url not in checker: - checker.append(url) - articles.append({ - 'title': title, 'date': date, 'url': url, 'description': description - }) - totalfeeds.append((feedtitle, articles)) - return totalfeeds diff --git a/recipes/strategy-business.recipe b/recipes/strategy-business.recipe deleted file mode 100644 index ce89e108ae..0000000000 --- a/recipes/strategy-business.recipe +++ /dev/null @@ -1,104 +0,0 @@ -from calibre.web.feeds.news import BasicNewsRecipe - - -class StrategyBusinessRecipe(BasicNewsRecipe): - __license__ = 'GPL v3' - __author__ = 'kwetal' - language = 'en' - version = 1 - - title = u'Strategy+Business' - publisher = u' Booz & Company' - category = u'Business' - description = (u'Business magazine for senior business executives and the people who influence them.' - 'Go to http://www.strategy-business.com/registration to sign up for a free account') - - oldest_article = 13 * 7 # 3 months - max_articles_per_feed = 100 - use_embedded_content = False - remove_empty_feeds = True - needs_subscription = True - - no_stylesheets = True - remove_javascript = True - - def get_browser(self): - br = BasicNewsRecipe.get_browser(self) - br.open('http://www.strategy-business.com/registration') - for i, f in enumerate(br.forms()): - if 'gatekeeper_edit' in f.name: - br.select_form(name=f.name) - for c in f.controls: - if c.name.endswith('_email'): - br[c.name] = self.username - elif c.name.endswith('_password'): - br[c.name] = self.password - br.submit().read() - break - return br - - extra_css = ''' - body{font-family:verdana,arial,helvetica,geneva,sans-serif ;} - a {text-decoration: none; color: blue;} - h1 {margin: 0em; padding: 0em;} - h2 {font-size: medium; font-weight: bold;} - #sb-date {font-size: xx-small; color: #696969} - #category {font-style: italic; font-size: small; color: black; margin: 0em; padding: 0em;} - #byline {font-size: small; color: #666666} - div.profiles {font-size: small; font-style: italic; color: #696969} - div.profiles h2 {font-size: medium; font-style: normal; font-weight: bold; color: black} - ''' - - feeds = [] - feeds.append( - (u'Finance', u'http://feeds.feedburner.com/StrategyBusiness-Finance?format=xml')) - feeds.append((u'Global Perspective', - u'http://feeds.feedburner.com/StrategyBusiness-GlobalPerspective?format=xml')) - feeds.append( - (u'Innovation', u'http://feeds.feedburner.com/StrategyBusiness-Innovation?format=xml')) - feeds.append((u'Marketing And Sales', - u'http://feeds.feedburner.com/StrategyBusiness-MarketingAndSales?format=xml')) - feeds.append((u'Operations And Manufacturing', - u'http://feeds.feedburner.com/StrategyBusiness-OperationsAndManufacturing?format=xml')) - feeds.append((u'Organizations And People', - u'http://feeds.feedburner.com/StrategyBusiness-OrganizationsAndPeople?format=xml')) - feeds.append((u'Strategy And Leadership', - u'http://feeds.feedburner.com/StrategyBusiness-StrategyAndLeadership?format=xml')) - feeds.append((u'Sustainability', - u'http://feeds.feedburner.com/StrategyBusiness-Sustainability?format=xml')) - feeds.append((u'Auto, Airlines And Transport', - u'http://feeds.feedburner.com/StrategyBusiness-AutoAirlinesAndTransport?format=xml')) - feeds.append((u'Consumer Products', - u'http://feeds.feedburner.com/StrategyBusiness-ConsumerProducts?format=xml')) - feeds.append( - (u'Energy', u'http://feeds.feedburner.com/StrategyBusiness-Energy?format=xml')) - feeds.append( - (u'Health Care', u'http://feeds.feedburner.com/StrategyBusiness-HealthCare?format=xml')) - feeds.append( - (u'Technology', u'http://feeds.feedburner.com/StrategyBusiness-Technology?format=xml')) - feeds.append((u'Thought Leaders', - u'http://feeds.feedburner.com/StrategyBusiness-ThoughtLeaders?format=xml')) - feeds.append((u'Business Literature', - u'http://feeds.feedburner.com/StrategyBusiness-BusinessLiterature?format=xml')) - feeds.append((u'Recent Research', - u'http://feeds.feedburner.com/StrategyBusiness-RecentResearch?format=xml')) - - keep_only_tags = [ - dict(name='h1'), - dict(attrs={'class': ['introAndByline', 'content', 'resources']}), - ] - - remove_tags = [] - remove_tags.append(dict(name='img', attrs={'class': 'content1'})) - remove_tags.append( - dict(name='img', attrs={'src': '/media/image/end_of_story.gif'})) - remove_tags.append(dict(name='div', attrs={'class': [ - 'sb-adarea468', 'GigyaShare', 'moreBlogLinks', 'clearboth', 'GigyaCommentsContainer']})) - remove_tags.append(dict(name='div', attrs={'id': 'sb-paging'})) - remove_tags.append(dict(name='div', attrs={'id': 'textsize'})) - remove_tags.append( - dict(name='div', id=lambda x: x and x.startswith('div-gpt-ad-'))) - - def get_article_url(self, article): - url = BasicNewsRecipe.get_article_url(self, article) - return url.partition('?')[0] + '?pg=all' diff --git a/recipes/sueddeutsche_mobil.recipe b/recipes/sueddeutsche_mobil.recipe deleted file mode 100644 index 9516fdce31..0000000000 --- a/recipes/sueddeutsche_mobil.recipe +++ /dev/null @@ -1,133 +0,0 @@ -__license__ = 'GPL v3' -__copyright__ = '2012, 2013 Andreas Zeiser ' -''' -szmobil.sueddeutsche.de/ -''' -# History -# 2015.01.02 Daily Cover from https://zeitung.sueddeutsche.de/webapp by lala-rob (web@lala-rob.de) -# 2014.12.18 Fixing URL set Cover by lala-rob (web@lala-rob.de) -# 2014.10.06 Fixing Login URL and Article URL by lala-rob (web@lala-rob.de) -# -# 2013.01.09 Fixed bugs in article titles containing "strong" and -# other small changes -# 2012.08.04 Initial release - -import datetime -import re - -from calibre import strftime -from calibre.utils.date import utcnow -from calibre.web.feeds.recipes import BasicNewsRecipe - - -class SZmobil(BasicNewsRecipe): - title = u'Süddeutsche Zeitung mobil' - __author__ = u'Andreas Zeiser' - description = u'Nachrichten aus Deutschland. Zugriff auf kostenpflichtiges Abo SZ mobil.' - publisher = u'Sueddeutsche Zeitung' - masthead_url = 'http://pix.sueddeutsche.de/img/layout/header/SZ_solo288x31.gif' - language = u'de' - publication_type = u'newspaper' - category = u'news, politics, Germany' - cover_url = 'https://zeitung.sueddeutsche.de/szdigital/public/issue/previewimage?size=l&issueId=' + \ - (utcnow() + datetime.timedelta(hours=1) - ).strftime("%Y-%m-%d") + '&targetVersion=3&productId=sz' - no_stylesheets = True - oldest_article = 2 - encoding = 'iso-8859-1' - needs_subscription = True - remove_empty_feeds = True - delay = 1 - - # if you want to get rid of the date on the title page use - # timefmt = '' - timefmt = ' [%a, %d %b, %Y]' - - root_url = 'http://epaper.sueddeutsche.de/app/service/epaper-mobil/' - keep_only_tags = [dict(name='div', attrs={'class': 'article'})] - - def get_browser(self): - browser = BasicNewsRecipe.get_browser(self) - - # Login via fetching of Streiflicht -> Fill out login request - url = 'https://id.sueddeutsche.de/login' - browser.open(url) - - browser.select_form(nr=0) # to select the first form - browser['login'] = self.username - browser['password'] = self.password - browser.submit() - return browser - - def parse_index(self): - # find all sections - src = self.index_to_soup( - 'http://epaper.sueddeutsche.de/app/service/epaper-mobil/') - feeds = [] - for itt in src.findAll('a', href=True): - if itt['href'].startswith('section.php?section'): - feeds.append((itt.string[0:-2], itt['href'])) - - all_articles = [] - for feed in feeds: - feed_url = self.root_url + feed[1] - feed_title = feed[0] - - self.report_progress(0, ('Fetching feed') + ' %s...' % - (feed_title if feed_title else feed_url)) - - src = self.index_to_soup(feed_url) - articles = [] - shorttitles = dict() - for itt in src.findAll('a', href=True): - if itt['href'].startswith('article.php?id='): - article_url = itt['href'] - article_id = int( - re.search(r"id=(\d*)&etag=", itt['href']).group(1)) - - # first check if link is a special article in section - # "Meinungsseite" - if itt.find('strong') is not None: - article_name = itt.strong.string - if len(itt.contents) > 1: - shorttitles[article_id] = itt.contents[1] - - articles.append( - (article_name, article_url, article_id)) - continue - - # candidate for a general article - if itt.string is None: - article_name = '' - else: - article_name = itt.string - - if (article_name.find(" mehr") == 0): - # just another link ("mehr") to an article - continue - - if itt.get('id') is not None: - shorttitles[article_id] = article_name - else: - articles.append( - (article_name, article_url, article_id)) - - feed_articles = [] - for article_name, article_url, article_id in articles: - url = self.root_url + article_url - title = article_name - # if you want to get rid of date for each article use - # pubdate = strftime('') - pubdate = strftime('[%a, %d %b]') - description = '' - if shorttitles.get(article_id) is not None: - description = shorttitles[article_id] - # we do not want the flag ("Impressum") - if "HERAUSGEGEBEN VOM" in description: - continue - d = dict(title=title, url=url, date=pubdate, - description=description, content='') - feed_articles.append(d) - all_articles.append((feed_title, feed_articles)) - - return all_articles diff --git a/recipes/sueddeutschezeitung.recipe b/recipes/sueddeutschezeitung.recipe deleted file mode 100644 index 88125650af..0000000000 --- a/recipes/sueddeutschezeitung.recipe +++ /dev/null @@ -1,126 +0,0 @@ -__license__ = 'GPL v3' -__copyright__ = '2010, Darko Miletic ' -''' -www.sueddeutsche.de/sz/ -''' -# History -# 2015.01.02 Daily Cover from https://zeitung.sueddeutsche.de/webapp by lala-rob (web@lala-rob.de) -# 2014.12.15 Set Cover by lala-rob(web@lala-rob.de) -# 2014.10.02 Fixed url Problem by lala-rob(web@lala-rob.de) - -import datetime - -from calibre import strftime -from calibre.utils.date import utcnow -from calibre.web.feeds.news import BasicNewsRecipe - - -class SueddeutcheZeitung(BasicNewsRecipe): - title = u'Süddeutsche Zeitung' - __author__ = 'Darko Miletic' - description = 'News from Germany. Access to paid content.' - publisher = u'Süddeutsche Zeitung' - category = 'news, politics, Germany' - no_stylesheets = True - oldest_article = 2 - encoding = 'iso-8859-1' - needs_subscription = True - remove_empty_feeds = True - delay = 1 - cover_url = 'https://zeitung.sueddeutsche.de/szdigital/public/issue/previewimage?size=l&issueId=' + \ - (utcnow() + datetime.timedelta(hours=1) - ).strftime("%Y-%m-%d") + '&targetVersion=3&productId=sz' - PREFIX = 'http://epaper.sueddeutsche.de' - INDEX = PREFIX + '/app/epaper/textversion/' - use_embedded_content = False - masthead_url = 'http://pix.sueddeutsche.de/img/layout/header/SZ_solo288x31.gif' - language = 'de' - publication_type = 'newspaper' - extra_css = ' body{font-family: Arial,Helvetica,sans-serif} ' - - conversion_options = { - 'comment': description, 'tags': category, 'publisher': publisher, 'language': language, 'linearize_tables': True - } - - remove_attributes = ['height', 'width', 'style'] - - def get_browser(self): - browser = BasicNewsRecipe.get_browser(self) - - # Login via fetching of Streiflicht -> Fill out login request - url = 'https://id.sueddeutsche.de/login' - browser.open(url) - - browser.select_form(nr=0) # to select the first form - browser['login'] = self.username - browser['password'] = self.password - browser.submit() - - return browser - - remove_tags = [ - dict(attrs={'class': 'hidePrint'}), dict( - name=['link', 'object', 'embed', 'base', 'iframe', 'br']) - ] - keep_only_tags = [dict(attrs={'class': 'artikelBox'})] - remove_tags_before = dict(attrs={'class': 'artikelTitel'}) - remove_tags_after = dict(attrs={'class': 'author'}) - - feeds = [ - - (u'Politik', INDEX + 'Politik/'), - (u'Seite drei', INDEX + 'Seite+drei/'), - (u'Thema des Tages', INDEX + 'Thema+des+Tages/'), - (u'Meinungsseite', INDEX + 'Meinungsseite/'), - (u'Wissen', INDEX + 'Wissen/'), - (u'Panorama', INDEX + 'Panorama/'), - (u'Feuilleton', INDEX + 'Feuilleton/'), - (u'Medien', INDEX + 'Medien/'), - (u'Wirtschaft', INDEX + 'Wirtschaft/'), - (u'Sport', INDEX + 'Sport/'), - (u'Bayern', INDEX + 'Bayern/'), - (u'Muenchen', INDEX + 'M%FCnchen/'), - (u'Muenchen City', INDEX + 'M%FCnchen+City/'), - (u'Jetzt.de', INDEX + 'Jetzt.de/'), - (u'Reise', INDEX + 'Reise/'), - (u'SZ Extra', INDEX + 'SZ+Extra/'), - (u'Wochenende', INDEX + 'SZ+am+Wochenende/'), - (u'Stellen-Markt', INDEX + 'Stellen-Markt/'), - (u'Motormarkt', INDEX + 'Motormarkt/'), - (u'Immobilien-Markt', INDEX + 'Immobilien-Markt/'), - (u'Thema', INDEX + 'Thema/'), - (u'Forum', INDEX + 'Forum/'), - (u'Leute', INDEX + 'Leute/'), - (u'Jugend', INDEX + 'Jugend/'), - (u'Beilage', INDEX + 'Beilage/') - ] - - def parse_index(self): - src = self.index_to_soup(self.INDEX) - id = '' - for itt in src.findAll('a', href=True): - if itt['href'].startswith('/app/epaper/textversion/inhalt/'): - id = itt['href'].rpartition('/inhalt/')[2] - totalfeeds = [] - lfeeds = self.get_feeds() - for feedobj in lfeeds: - feedtitle, feedurl = feedobj - self.report_progress(0, ('Fetching feed') + ' %s...' % - (feedtitle if feedtitle else feedurl)) - articles = [] - soup = self.index_to_soup(feedurl + id) - tbl = soup.find(attrs={'class': 'szprintd'}) - for item in tbl.findAll(name='td', attrs={'class': 'topthema'}): - atag = item.find(attrs={'class': 'Titel'}).a - ptag = item.find('p') - stag = ptag.find('script') - if stag: - stag.extract() - url = self.PREFIX + atag['href'] - title = self.tag_to_string(atag) - description = self.tag_to_string(ptag) - articles.append({ - 'title': title, 'date': strftime(self.timefmt), 'url': url, 'description': description - }) - totalfeeds.append((feedtitle, articles)) - return totalfeeds diff --git a/recipes/superbebe.recipe b/recipes/superbebe.recipe deleted file mode 100644 index 491cad033a..0000000000 --- a/recipes/superbebe.recipe +++ /dev/null @@ -1,49 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- - -__license__ = 'GPL v3' -__copyright__ = u'2011, Silviu Cotoar\u0103' -''' -superbebe.ro -''' - -from calibre.web.feeds.news import BasicNewsRecipe - - -class Superbebe(BasicNewsRecipe): - title = u'Superbebe' - __author__ = u'Silviu Cotoar\u0103' - description = 'Superbebe' - publisher = 'Superbebe' - oldest_article = 5 - language = 'ro' - max_articles_per_feed = 100 - no_stylesheets = True - use_embedded_content = False - category = 'Ziare,Reviste,Bebe,Mamici' - encoding = 'utf-8' - cover_url = 'http://www.superbebe.ro/images/superbebe.gif' - - conversion_options = { - 'comments': description, 'tags': category, 'language': language, 'publisher': publisher - } - - keep_only_tags = [ - dict(name='div', attrs={'class': 'articol'}) - ] - - remove_tags = [ - dict(name='div', attrs={'class': ['info']}), dict( - name='div', attrs={'class': ['tags']}) - ] - - remove_tags_after = [ - dict(name='div', attrs={'class': ['tags']}) - ] - - feeds = [ - (u'Feeds', u'http://www.superbebe.ro/rss') - ] - - def preprocess_html(self, soup): - return self.adeify_images(soup) diff --git a/recipes/superesportes.recipe b/recipes/superesportes.recipe deleted file mode 100644 index d2f24d650a..0000000000 --- a/recipes/superesportes.recipe +++ /dev/null @@ -1,76 +0,0 @@ -__license__ = 'GPL v3' -__copyright__ = '2010, Luciano Furtado ' -''' -www.superesportes.com.br -''' - -from calibre.web.feeds.news import BasicNewsRecipe - - -class SuperEsportesRecipe(BasicNewsRecipe): - - title = u'www.superesportes.com.br' - description = u'Superesportes - Not√≠cias do esporte no Brasil e no mundo' - __author__ = 'Luciano Furtado' - language = 'pt' - category = 'esportes, Brasil' - no_stylesheets = True - oldest_article = 7 - - use_embedded_content = 0 - max_articles_per_feed = 10 - cover_url = 'http://imgs.mg.superesportes.com.br/superesportes_logo.png' - - extra_css = 'div.info_noticias h1 { font-size: 100% }' - - remove_tags = [ - dict(name='div', attrs={'class': 'topo'}), - dict(name='div', attrs={'class': 'rodape'}), - dict(name='div', attrs={'class': 'navegacao'}), - dict(name='div', attrs={'class': 'lateral2'}), - dict(name='div', attrs={'class': 'leia_mais'}), - dict(name='div', attrs={'id': 'comentar'}), - dict(name='div', attrs={'id': 'vrumelc_noticia'}), - dict(name='div', attrs={'class': 'compartilhe'}), - dict(name='div', attrs={'class': 'linha_noticias'}), - dict(name='div', attrs={'class': 'botoes_noticias'}), - dict(name='div', attrs={'class': 'barra_time bg_time'}), - ] - - def parse_index(self): - feeds = [] - sections = [ - (u'Atletico', 'http://www.df.superesportes.com.br/futebol/atletico-mg/capa_atletico_mg/index.shtml'), - (u'Botafogo', 'http://www.df.superesportes.com.br/futebol/botafogo/capa_botafogo/index.shtml'), - (u'Corinthinas', 'http://www.df.superesportes.com.br/futebol/corinthians/capa_corinthians/index.shtml'), - (u'Cruzeiro', 'http://www.df.superesportes.com.br/futebol/cruzeiro/capa_cruzeiro/index.shtml'), - (u'Flamengo', 'http://www.df.superesportes.com.br/futebol/flamengo/capa_flamengo/index.shtml'), - (u'Fluminense', 'http://www.df.superesportes.com.br/futebol/fluminense/capa_fluminense/index.shtml'), - (u'Palmeiras', 'http://www.df.superesportes.com.br/futebol/palmeiras/capa_palmeiras/index.shtml'), - (u'Santos', 'http://www.df.superesportes.com.br/futebol/santos/capa_santos/index.shtml'), - (u'S√£o Paulo', 'http://www.df.superesportes.com.br/futebol/sao-paulo/capa_sao_paulo/index.shtml'), - (u'Vasco', 'http://www.df.superesportes.com.br/futebol/vasco/capa_vasco/index.shtml'), - ] - - for section, url in sections: - current_articles = [] - - soup = self.index_to_soup(url) - latestNews = soup.find( - name='ul', attrs={'class': 'lista_ultimas_noticias'}) - - for li_tag in latestNews.findAll(name='li'): - a_tag = li_tag.find('a', href=True) - if a_tag is None: - continue - title = self.tag_to_string(a_tag) - url = a_tag.get('href', False) - self.log("\n\nFound title: " + title + - "\nUrl: " + url + "\nSection: " + section) - current_articles.append( - {'title': title, 'url': url, 'description': title, 'date': ''}) - - if current_articles: - feeds.append((section, current_articles)) - - return feeds diff --git a/recipes/swarajya.recipe b/recipes/swarajya.recipe index 76fd237026..48660cecb2 100644 --- a/recipes/swarajya.recipe +++ b/recipes/swarajya.recipe @@ -1,51 +1,91 @@ -from calibre.web.feeds.news import BasicNewsRecipe, classes +#!/usr/bin/env python +import re +import json + +from calibre.web.feeds.news import BasicNewsRecipe + + +def absurl(url): + if url.startswith('/'): + return 'https://swarajyamag.com' + url + return url + + +html_entities = {'"': '"', ''': "'", '<': '<', '>': '>', '&': '&'} class SwarajyaMag(BasicNewsRecipe): - title = u'Swarajya Magazine' + title = 'Swarajya Magazine' __author__ = 'unkn0wn' description = 'Swarajya - a big tent for liberal right of centre discourse that reaches out, engages and caters to the new India.' language = 'en_IN' - no_stylesheets = True remove_javascript = True use_embedded_content = False - remove_attributes = ['height', 'width', 'style'] encoding = 'utf-8' - keep_only_tags = [ - dict(name='article') - ] - - remove_tags = [ - dict(name=['svg', 'button', 'source']), - classes('swarajya_patron_block hs-tooltip-content hidden'), - ] + recipe_specific_options = { + 'issue': { + 'short': 'The edition URL ', + } + } def preprocess_html(self, soup): - for span in soup.findAll('span'): - if self.tag_to_string(span).strip() == 'Tags': - div = span.findParent('div') - if div: - div.extract() + for img in soup.findAll('img', attrs={'src': True}): + img['src'] = img['src'].split('?')[0] + '?w=600' return soup def parse_index(self): - soup = self.index_to_soup('https://swarajyamag.com/all-issues') - a = soup.find('a', href=lambda x: x and x.startswith('https://swarajyamag.com/issue/')) - url = a['href'] + d = self.recipe_specific_options.get('issue') + if d and isinstance(d, str): + url = d + else: + soup = self.index_to_soup('https://swarajyamag.com/all-issues') + a = soup.find('a', href=lambda x: x and x.startswith('/issue/')) + url = absurl(a['href']) self.log('Downloading issue:', url) - self.cover_url = a.img['src'] + soup = self.index_to_soup(url) ans = [] - for div in soup.findAll('div', attrs={'class':'rounded'}): + cont = soup.find(attrs={'id': 'container'}) + self.cover_url = ( + cont.find('a', href=lambda x: x and x.startswith('/issue/')) + .img['src'] + .split('?')[0] + + '?w=600' + ) + for div in cont.findAll('div', attrs={'class': 'rounded'}): url = div.findParent('a')['href'] if url.startswith('/'): url = 'https://swarajyamag.com' + url h4 = div.find('h4') title = self.tag_to_string(h4) - d = h4.next_sibling + d = h4.next_sibling.div desc = 'By ' + self.tag_to_string(d).strip() self.log(title, ' at ', url, '\n', desc) ans.append({'title': title, 'url': url, 'description': desc}) return [('Articles', ans)] + + def preprocess_raw_html(self, raw, url): + app = re.search( + r'