From 2346be7553a001f68371d9c67aed460d7fd9663c Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Wed, 23 Dec 2009 17:21:59 -0700 Subject: [PATCH] New recipes for SME and Pravda by kwetal --- resources/recipes/pravda.recipe | 58 +++++++++++++++++++++++++++ resources/recipes/sme.recipe | 69 +++++++++++++++++++++++++++++++++ 2 files changed, 127 insertions(+) create mode 100644 resources/recipes/pravda.recipe create mode 100644 resources/recipes/sme.recipe diff --git a/resources/recipes/pravda.recipe b/resources/recipes/pravda.recipe new file mode 100644 index 0000000000..783e3be06c --- /dev/null +++ b/resources/recipes/pravda.recipe @@ -0,0 +1,58 @@ +from calibre.web.feeds.news import BasicNewsRecipe + +class PravdaSlovakiaRecipe(BasicNewsRecipe): + __license__ = 'GPL v3' + __author__ = 'kwetal' + language = 'sk' + version = 1 + + title = u'Pravda' + publisher = u'' + category = u'News, Newspaper' + description = u'News from Slovakia' + + oldest_article = 1 + max_articles_per_feed = 100 + use_embedded_content = False + remove_empty_feeds = True + + no_stylesheets = True + remove_javascript = True + + # Feeds from: http://spravy.pravda.sk/info.asp?y=sk_kskop/rssinfo.htm + feeds = [] + feeds.append((u'Spravodajstvo', u'http://servis.pravda.sk/rss.asp')) + feeds.append((u'N\u00E1zory', u'http://servis.pravda.sk/rss.asp?o=sk_nazory')) + feeds.append((u'\u0160port', u'http://servis.pravda.sk/rss.asp?o=sk_sport')) + feeds.append((u'Peniaze', u'http://servis.pravda.sk/rss.asp?o=sk_peniaze')) + feeds.append((u'Koktail', u'http://servis.pravda.sk/rss.asp?o=sk_koktail')) + feeds.append((u'Kult\u00FAra', u'http://servis.pravda.sk/rss.asp?o=sk_kultura')) + feeds.append((u'B\u00FDvanie', u'http://servis.pravda.sk/rss.asp?o=sk_byvanie')) + feeds.append((u'Veda a Technika', u'http://servis.pravda.sk/rss.asp?o=sk_veda')) + feeds.append((u'Mozgov\u00F1a', u'http://servis.pravda.sk/rss.asp?o=sk_mozgovna')) + feeds.append((u'Auto', u'http://servis.pravda.sk/rss.asp?o=sk_autoweb')) + feeds.append((u'Cestovanie', u'http://servis.pravda.sk/rss.asp?o=sk_cestovanie')) + feeds.append((u'Regi\u00F3ny', u'http://servis.pravda.sk/rss.asp?r=sk_regiony')) + feeds.append((u'Profesia', u'http://servis.pravda.sk/rss.asp?o=sk_profesia')) + feeds.append((u'Zdravie', u'http://servis.pravda.sk/rss.asp?o=sk_zdravie')) + feeds.append((u'\u010C\u00EDtajme de\u0165om', u'http://servis.pravda.sk/rss.asp?o=sk_citajme')) + + remove_tags = [] + remove_tags.append(dict(name = 'p', attrs = {'class': 'spatNaClanok'})) + remove_tags.append(dict(name = 'ul')) + + extra_css = ''' + @font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} + @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/LiberationSans.ttf)} + body {font-family: sans1, serif1;} + .art-info {font-size: x-small; color: #696969; margin-bottom: 0.3em;} + .img-info {font-size: x-small; color: #696969;} + ''' + + def print_version(self, url): + ignore, sep, main = url.rpartition('/') + app, sep, id = main.rpartition('?') + + app = app.replace('.asp', '') + + return 'http://sport.pravda.sk/tlac.asp?r=' + app + '&' + id diff --git a/resources/recipes/sme.recipe b/resources/recipes/sme.recipe new file mode 100644 index 0000000000..44cbdde35b --- /dev/null +++ b/resources/recipes/sme.recipe @@ -0,0 +1,69 @@ +from calibre.web.feeds.news import BasicNewsRecipe +import re + +class SmeRecipe(BasicNewsRecipe): + __license__ = 'GPL v3' + __author__ = 'kwetal' + language = 'sk' + version = 1 + + title = u'SME' + publisher = u'' + category = u'News, Newspaper' + description = u'News from Slovakia' + + oldest_article = 1 + max_articles_per_feed = 100 + use_embedded_content = False + remove_empty_feeds = True + + no_stylesheets = True + remove_javascript = True + + # Feeds from: http://rss.sme.sk/ + feeds = [] + feeds.append((u'Tituln\u00E1 strana', u'http://rss.sme.sk/rss/rss.asp?id=frontpage')) + feeds.append((u'Naj\u010D\u00EDtanej\u0161ie za 4 hodiny', u'http://rss.sme.sk/rss/rss.asp?id=smenajcit4')) + feeds.append((u'Naj\u010D\u00EDtanej\u0161ie za 24 hod\u00EDn', u'http://rss.sme.sk/rss/rss.asp?id=smenajcit24')) + feeds.append((u'Z domova', u'http://rss.sme.sk/rss/rss.asp?sek=smeonline&rub=online_zdom')) + feeds.append((u'Zahrani\u010Die', u'http://rss.sme.sk/rss/rss.asp?sek=smeonline&rub=online_zahr')) + feeds.append((u'Z domova + zahrani\u010Die', u'http://rss.sme.sk/rss/rss.asp?sek=smeonline')) + feeds.append((u'Ekonomika', u'http://rss.sme.sk/rss/rss.asp?sek=ekon')) + feeds.append((u'Kult\u00FAra', u'http://rss.sme.sk/rss/rss.asp?sek=kult')) + feeds.append((u'Koment\u00E1re', u'http://rss.sme.sk/rss/rss.asp?sek=koment')) + feeds.append((u'Volby', u'http://rss.sme.sk/rss/rss.asp?sek=eVolby')) + #feeds.append((u'Press foto', u'http://rss.sme.sk/rss/rss.asp?sek=smeonline&rub=online_foto')) + feeds.append((u'\u0160port', u'http://rss.sme.sk/rss/rss.asp?sek=sport')) + feeds.append((u'Futbal', u'http://rss.sme.sk/rss/rss.asp?sek=futbal')) + feeds.append((u'Hokej', u'http://rss.sme.sk/rss/rss.asp?sek=hokej')) + feeds.append((u'Po\u010D\u00EDta\u010De', u'http://rss.sme.sk/rss/rss.asp?sek=pocit')) + feeds.append((u'Mobil', u'http://rss.sme.sk/rss/rss.asp?sek=mobil')) + feeds.append((u'Veda', u'http://rss.sme.sk/rss/rss.asp?sek=veda')) + feeds.append((u'Natankuj', u'http://rss.sme.sk/rss/rss.asp?sek=natankuj')) + feeds.append((u'Auto', u'http://rss.sme.sk/rss/rss.asp?sek=auto')) + feeds.append((u'Dom\u00E1cnos\u0165', u'http://rss.sme.sk/rss/rss.asp?sek=domac')) + feeds.append((u'\u017Dena', u'http://rss.sme.sk/rss/rss.asp?sek=zena')) + feeds.append((u'Z\u00E1bava', u'http://rss.sme.sk/rss/rss.asp?sek=zabava')) + feeds.append((u'Hry', u'http://rss.sme.sk/rss/rss.asp?sek=hry')) + #feeds.append((u'', u'')) + + keep_only_tags = [] + keep_only_tags.append(dict(name = 'div', attrs = {'id': 'contenth'})) + keep_only_tags.append(dict(name = 'div', attrs = {'class': 'articlec col'})) + + remove_tags = [] + remove_tags.append(dict(name = 'div', attrs = {'id': re.compile('smeplayer.*')})) + + remove_tags_after = [dict(name = 'p', attrs = {'class': 'autor_line'})] + + extra_css = ''' + @font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} + @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/LiberationSans.ttf)} + body {font-family: sans1, serif1;} + ''' + + def print_version(self, url): + parts = url.split('/') + id = parts[4] + + return u'http://korzar.sme.sk/clanok_tlac.asp?cl=' + str(id)