__license__ = 'GPL v3' __copyright__ = '2008-2012, Darko Miletic ' ''' b92.net ''' import re from calibre.web.feeds.news import BasicNewsRecipe class B92(BasicNewsRecipe): title = 'B92' __author__ = 'Darko Miletic' description = 'Najnovije vesti iz Srbije, regiona i sveta, aktuelne teme iz sveta politike, ekonomije, drustva, foto galerija, kolumne' publisher = 'B92' category = 'news, politics, Serbia' oldest_article = 2 max_articles_per_feed = 100 no_stylesheets = True use_embedded_content = False encoding = 'cp1250' language = 'sr' publication_type = 'newsportal' masthead_url = 'http://b92s.net/v4/img/new-logo.png' extra_css = """ @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: Arial,Helvetica,sans1,sans-serif} .article-info2,.article-info1{text-transform: uppercase; font-size: small} img{display: block} .sms{font-weight: bold} """ conversion_options = { 'comment': description, 'tags': category, 'publisher': publisher, 'language': language, 'linearize_tables': True } preprocess_regexps = [ (re.compile(u'\u0110'), lambda match: u'\u00D0'), (re.compile(r'', re.DOTALL | re.IGNORECASE), lambda match: 'something') ] keep_only_tags = [dict(attrs={'class': ['article-info1', 'article-text']})] remove_attributes = ['width', 'height', 'align', 'hspace', 'vspace', 'border', 'lang', 'xmlns:fb'] remove_tags = [ dict(name=['embed', 'link', 'base', 'meta', 'iframe']), dict( attrs={'id': 'social'}) ] feeds = [ (u'Vesti', u'http://www.b92.net/info/rss/vesti.xml'), (u'Biz', u'http://www.b92.net/info/rss/biz.xml'), (u'Sport', u'http://www.b92.net/info/rss/sport.xml'), (u'Zivot', u'http://www.b92.net/info/rss/zivot.xml'), (u'Kultura', u'http://www.b92.net/info/rss/kultura.xml'), (u'Automobili', u'http://www.b92.net/info/rss/automobili.xml'), (u'Tehnopolis', u'http://www.b92.net/info/rss/tehnopolis.xml') ] def preprocess_html(self, soup): for item in soup.findAll(style=True): del item['style'] for alink in soup.findAll('a'): if alink.string is not None: tstr = alink.string alink.replaceWith(tstr) return soup