From 4b670c5f306e0b4caf7a4865474cbfe0a7dae506 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Thu, 11 Feb 2010 09:39:14 -0700 Subject: [PATCH] Fix #4869 (Updated recipe for Danas newspaper) --- resources/recipes/danas.recipe | 53 +++++++++++++++++----------------- 1 file changed, 26 insertions(+), 27 deletions(-) diff --git a/resources/recipes/danas.recipe b/resources/recipes/danas.recipe index 4de308a57d..081c46a5d2 100644 --- a/resources/recipes/danas.recipe +++ b/resources/recipes/danas.recipe @@ -1,64 +1,63 @@ -#!/usr/bin/env python - __license__ = 'GPL v3' -__copyright__ = '2008-2009, Darko Miletic ' +__copyright__ = '2008-2010, Darko Miletic ' ''' danas.rs ''' + import re from calibre.web.feeds.news import BasicNewsRecipe -from calibre.ebooks.BeautifulSoup import Tag class Danas(BasicNewsRecipe): title = 'Danas' __author__ = 'Darko Miletic' - description = 'Vesti' + description = 'Dnevne novine sa vestima iz sveta, politike, ekonomije, kulture, sporta, Beograda, Novog Sada i cele Srbije.' publisher = 'Danas d.o.o.' category = 'news, politics, Serbia' oldest_article = 2 max_articles_per_feed = 100 no_stylesheets = False use_embedded_content = False + encoding = 'utf-8' + masthead_url = 'http://www.danas.rs/images/basic/danas.gif' language = 'sr' - lang = 'sr-Latn-RS' - direction = 'ltr' - extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: sans1, sans-serif}' + extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} .article_description,body,.lokacija{font-family: Tahoma,Arial,Helvetica,sans1,sans-serif} .nadNaslov,h1,.preamble{font-family: Georgia,"Times New Roman",Times,serif1,serif} .antrfileText{border-left: 2px solid #999999; color:#666666; margin-left: 0.8em; padding-left: 1.2em; margin-bottom: 0; margin-top: 0} h2,.datum,.lokacija,.autor{font-size: small} .antrfileNaslov{border-left: 2px solid #999999; color:#666666; margin-left: 0.8em; padding-left: 1.2em; font-weight:bold; margin-bottom: 0; margin-top: 0} img{margin-bottom: 0.8em} ' conversion_options = { 'comment' : description , 'tags' : category , 'publisher' : publisher , 'language' : language - , 'pretty_print' : True } - preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')] keep_only_tags = [dict(name='div', attrs={'id':'left'})] remove_tags = [ dict(name='div', attrs={'class':['width_1_4','metaClanka','baner']}) ,dict(name='div', attrs={'id':'comments'}) - ,dict(name=['object','link']) + ,dict(name=['object','link','iframe']) ] - feeds = [ - (u'Vesti' , u'http://www.danas.rs/rss/rss.asp' ) - ,(u'Periskop', u'http://www.danas.rs/rss/rss.asp?column_id=4') + feeds = [ + (u'Politika' , u'http://www.danas.rs/rss/rss.asp?column_id=27') + ,(u'Hronika' , u'http://www.danas.rs/rss/rss.asp?column_id=2' ) + ,(u'Drustvo' , u'http://www.danas.rs/rss/rss.asp?column_id=24') + ,(u'Dijalog' , u'http://www.danas.rs/rss/rss.asp?column_id=1' ) + ,(u'Ekonomija', u'http://www.danas.rs/rss/rss.asp?column_id=6' ) + ,(u'Svet' , u'http://www.danas.rs/rss/rss.asp?column_id=25') + ,(u'Srbija' , u'http://www.danas.rs/rss/rss.asp?column_id=28') + ,(u'Kultura' , u'http://www.danas.rs/rss/rss.asp?column_id=5' ) + ,(u'Sport' , u'http://www.danas.rs/rss/rss.asp?column_id=13') + ,(u'Scena' , u'http://www.danas.rs/rss/rss.asp?column_id=42') + ,(u'Feljton' , u'http://www.danas.rs/rss/rss.asp?column_id=19') + ,(u'Periskop' , u'http://www.danas.rs/rss/rss.asp?column_id=4' ) ] def preprocess_html(self, soup): - mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)]) - soup.head.insert(0,mlang) - attribs = [ 'style','font','valign' - ,'colspan','width','height' - ,'rowspan','summary','align' - ,'cellspacing','cellpadding' - ,'frames','rules','border' - ] - for item in soup.body.findAll(name=['table','td','tr','th','caption','thead','tfoot','tbody','colgroup','col']): - item.name = 'div' - for attrib in attribs: - if item.has_key(attrib): - del item[attrib] + for item in soup.findAll(style=True): + del item['style'] return soup + + def print_version(self, url): + return url + '&action=print' +