__license__ = 'GPL v3' __copyright__ = '2010, Darko Miletic ' ''' dw-world.de ''' import re from calibre.web.feeds.news import BasicNewsRecipe class DeutscheWelle_sr(BasicNewsRecipe): title = 'Deutsche Welle' __author__ = 'Darko Miletic' description = 'Vesti iz Nemacke i sveta' publisher = 'Deutsche Welle' category = 'news, politics, Germany' oldest_article = 1 max_articles_per_feed = 100 use_embedded_content = False no_stylesheets = True language = 'sr' publication_type = 'newsportal' remove_empty_feeds = True masthead_url = 'http://www.dw-world.de/skins/std/channel1/pics/dw_logo1024.gif' extra_css = """ @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: Arial,sans1,sans-serif} img{margin-top: 0.5em; margin-bottom: 0.2em; display: block} .caption{font-size: x-small; display: block; margin-bottom: 0.4em} """ preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')] conversion_options = { 'comment': description, 'tags': category, 'publisher': publisher, 'language': language } remove_tags = [ dict(name=['iframe', 'embed', 'object', 'form', 'base', 'meta', 'link']), dict( attrs={'class': 'actionFooter'}) ] keep_only_tags = [dict(attrs={'class': 'ArticleDetail detail'})] remove_attributes = ['height', 'width', 'onclick', 'border', 'lang'] feeds = [ (u'Politika', u'http://rss.dw-world.de/rdf/rss-ser-pol'), (u'Srbija', u'http://rss.dw-world.de/rdf/rss-ser-pol-ser'), (u'Region', u'http://rss.dw-world.de/rdf/rss-ser-pol-region'), (u'Evropa', u'http://rss.dw-world.de/rdf/rss-ser-pol-eu'), (u'Nemacka', u'http://rss.dw-world.de/rdf/rss-ser-pol-ger'), (u'Svet', u'http://rss.dw-world.de/rdf/rss-ser-pol-ger'), (u'Pregled stampe', u'http://rss.dw-world.de/rdf/rss-ser-pol-ger'), (u'Nauka Tehnika Medicina', u'http://rss.dw-world.de/rdf/rss-ser-science'), (u'Kultura', u'feed:http://rss.dw-world.de/rdf/rss-ser-cul') ] def print_version(self, url): artl = url.rpartition('/')[2] return 'http://www.dw-world.de/popups/popup_printcontent/' + artl def preprocess_html(self, soup): for item in soup.findAll('a'): limg = item.find('img') if item.string is not None: str = item.string item.replaceWith(str) else: if limg: item.name = 'div' del item['href'] item['target'] = '' del item['target'] else: str = self.tag_to_string(item) item.replaceWith(str) return soup