__license__ = 'GPL v3' __copyright__ = '2008-2010, Darko Miletic ' ''' nspm.rs ''' import re from calibre.web.feeds.news import BasicNewsRecipe from calibre.ebooks.BeautifulSoup import NavigableString class Nspm(BasicNewsRecipe): title = 'Nova srpska politicka misao' __author__ = 'Darko Miletic' description = 'Casopis za politicku teoriju i drustvena istrazivanja' publisher = 'NSPM' category = 'news, politics, Serbia' oldest_article = 7 max_articles_per_feed = 100 no_stylesheets = True use_embedded_content = False INDEX = 'http://www.nspm.rs/?alphabet=l' encoding = 'utf-8' language = 'sr' delay = 2 remove_empty_feeds = True publication_type = 'magazine' masthead_url = 'http://www.nspm.rs/templates/jsn_epic_pro/images/logol.jpg' extra_css = """ @font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: "Times New Roman", serif1, serif} .article_description{font-family: Arial, sans1, sans-serif} img{margin-top:0.5em; margin-bottom: 0.7em} .author{color: #990000; font-weight: bold} .author,.createdate{font-size: 0.9em} """ conversion_options = { 'comment' : description , 'tags' : category , 'publisher' : publisher , 'language' : language , 'linearize_tables' : True } preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')] keep_only_tags = [dict(attrs={'id':'jsn-mainbody'})] remove_tags = [ dict(name=['link','object','embed','script','meta','base','iframe']) ,dict(attrs={'class':'buttonheading'}) ] remove_tags_before = dict(attrs={'class':'contentheading'}) remove_tags_after = dict(attrs={'class':'article_separator'}) remove_attributes = ['width','height'] def get_browser(self): br = BasicNewsRecipe.get_browser() br.open(self.INDEX) return br feeds = [ (u'Rubrike' , u'http://www.nspm.rs/rubrike/feed/rss.html') ,(u'Debate' , u'http://www.nspm.rs/debate/feed/rss.html') ,(u'Reci i misli' , u'http://www.nspm.rs/reci-i-misli/feed/rss.html') ,(u'Samo smeh srbina spasava', u'http://www.nspm.rs/samo-smeh-srbina-spasava/feed/rss.html') ,(u'Polemike' , u'http://www.nspm.rs/polemike/feed/rss.html') ,(u'Prikazi' , u'http://www.nspm.rs/prikazi/feed/rss.html') ,(u'Prenosimo' , u'http://www.nspm.rs/prenosimo/feed/rss.html') ,(u'Hronika' , u'http://www.nspm.rs/tabela/hronika/feed/rss.html') ] def preprocess_html(self, soup): for item in soup.body.findAll(style=True): del item['style'] for item in soup.body.findAll('h1'): nh = NavigableString(item.a.string) item.a.extract() item.insert(0,nh) return self.adeify_images(soup)