from calibre.web.feeds.news import BasicNewsRecipe import re class SmeRecipe(BasicNewsRecipe): __license__ = 'GPL v3' __author__ = 'kwetal' language = 'sk' version = 1 title = u'SME' publisher = u'' category = u'News, Newspaper' description = u'News from Slovakia' oldest_article = 1 max_articles_per_feed = 100 use_embedded_content = False remove_empty_feeds = True no_stylesheets = True remove_javascript = True # Feeds from: http://rss.sme.sk/ feeds = [] feeds.append((u'Tituln\u00E1 strana', u'http://rss.sme.sk/rss/rss.asp?id=frontpage')) feeds.append((u'Naj\u010D\u00EDtanej\u0161ie za 4 hodiny', u'http://rss.sme.sk/rss/rss.asp?id=smenajcit4')) feeds.append((u'Naj\u010D\u00EDtanej\u0161ie za 24 hod\u00EDn', u'http://rss.sme.sk/rss/rss.asp?id=smenajcit24')) feeds.append((u'Z domova', u'http://rss.sme.sk/rss/rss.asp?sek=smeonline&rub=online_zdom')) feeds.append((u'Zahrani\u010Die', u'http://rss.sme.sk/rss/rss.asp?sek=smeonline&rub=online_zahr')) feeds.append((u'Z domova + zahrani\u010Die', u'http://rss.sme.sk/rss/rss.asp?sek=smeonline')) feeds.append((u'Ekonomika', u'http://rss.sme.sk/rss/rss.asp?sek=ekon')) feeds.append((u'Kult\u00FAra', u'http://rss.sme.sk/rss/rss.asp?sek=kult')) feeds.append((u'Koment\u00E1re', u'http://rss.sme.sk/rss/rss.asp?sek=koment')) feeds.append((u'Volby', u'http://rss.sme.sk/rss/rss.asp?sek=eVolby')) #feeds.append((u'Press foto', u'http://rss.sme.sk/rss/rss.asp?sek=smeonline&rub=online_foto')) feeds.append((u'\u0160port', u'http://rss.sme.sk/rss/rss.asp?sek=sport')) feeds.append((u'Futbal', u'http://rss.sme.sk/rss/rss.asp?sek=futbal')) feeds.append((u'Hokej', u'http://rss.sme.sk/rss/rss.asp?sek=hokej')) feeds.append((u'Po\u010D\u00EDta\u010De', u'http://rss.sme.sk/rss/rss.asp?sek=pocit')) feeds.append((u'Mobil', u'http://rss.sme.sk/rss/rss.asp?sek=mobil')) feeds.append((u'Veda', u'http://rss.sme.sk/rss/rss.asp?sek=veda')) feeds.append((u'Natankuj', u'http://rss.sme.sk/rss/rss.asp?sek=natankuj')) feeds.append((u'Auto', u'http://rss.sme.sk/rss/rss.asp?sek=auto')) feeds.append((u'Dom\u00E1cnos\u0165', u'http://rss.sme.sk/rss/rss.asp?sek=domac')) feeds.append((u'\u017Dena', u'http://rss.sme.sk/rss/rss.asp?sek=zena')) feeds.append((u'Z\u00E1bava', u'http://rss.sme.sk/rss/rss.asp?sek=zabava')) feeds.append((u'Hry', u'http://rss.sme.sk/rss/rss.asp?sek=hry')) #feeds.append((u'', u'')) keep_only_tags = [] keep_only_tags.append(dict(name = 'div', attrs = {'id': 'contenth'})) keep_only_tags.append(dict(name = 'div', attrs = {'class': 'articlec col'})) remove_tags = [] remove_tags.append(dict(name = 'div', attrs = {'id': re.compile('smeplayer.*')})) remove_tags_after = [dict(name = 'p', attrs = {'class': 'autor_line'})] extra_css = ''' @font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/LiberationSans.ttf)} body {font-family: sans1, serif1;} ''' def print_version(self, url): parts = url.split('/') id = parts[4] return u'http://korzar.sme.sk/clanok_tlac.asp?cl=' + str(id)