__license__ = 'GPL v3' __copyright__ = '2013, Darko Miletic ' ''' libartes.com ''' import re from calibre import strftime from calibre.web.feeds.news import BasicNewsRecipe class Libartes(BasicNewsRecipe): title = 'Libartes' __author__ = 'Darko Miletic' description = 'Elektronski časopis Libartes delo je kulturnih entuzijasta, umetnika i teoretičara umetnosti i književnosti. Časopis Libartes izlazi tromesečno i bavi se različitim granama umetnosti - književnošću, muzikom, filmom, likovnim umetnostima, dizajnom i arhitekturom.' publisher = 'Libartes' category = 'literatura, knjizevnost, film, dizajn, arhitektura, muzika' no_stylesheets = True INDEX = 'http://libartes.com/' use_embedded_content = False encoding = 'utf-8' language = 'sr' publication_type = 'magazine' masthead_url = 'http://libartes.com/index_files/logo.gif' extra_css = """ @font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: "Times New Roman",Times,serif1, serif} img{display:block} .naslov{font-size: xx-large; font-weight: bold} .nag{font-size: large; font-weight: bold} """ conversion_options = { 'comment' : description , 'tags' : category , 'publisher' : publisher , 'language' : language } preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')] remove_tags_before = dict(attrs={'id':'nav'}) remove_tags_after = dict(attrs={'id':'fb' }) keep_only_tags = [dict(name='div', attrs={'id':'center_content'})] remove_tags = [ dict(name=['object','link','iframe','embed','meta']) ,dict(attrs={'id':'nav'}) ] def parse_index(self): articles = [] soup = self.index_to_soup(self.INDEX) for item in soup.findAll(name='a', attrs={'class':'belad'}, href=True): feed_link = item if feed_link['href'].startswith(self.INDEX): url = feed_link['href'] else: url = self.INDEX + feed_link['href'] title = self.tag_to_string(feed_link) date = strftime(self.timefmt) articles.append({ 'title' :title ,'date' :date ,'url' :url ,'description':'' }) return [('Casopis Libartes', articles)]