From 1e90c3b4308badb0d0d0aa49b2fe3cf242931169 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sun, 17 Aug 2014 08:23:04 +0530 Subject: [PATCH] Update Radikal (Turkey). Fixes #1357794 [Updated recipe for Turkish radikal](https://bugs.launchpad.net/calibre/+bug/1357794) --- recipes/radikal_tr.recipe | 57 ++++++++++++++++++++------------------- 1 file changed, 30 insertions(+), 27 deletions(-) diff --git a/recipes/radikal_tr.recipe b/recipes/radikal_tr.recipe index 601caf9a1a..4fa65213f5 100644 --- a/recipes/radikal_tr.recipe +++ b/recipes/radikal_tr.recipe @@ -1,5 +1,5 @@ __license__ = 'GPL v3' -__copyright__ = '2010, Darko Miletic ' +__copyright__ = '2010-2014, Darko Miletic ' ''' radikal.com.tr ''' @@ -12,17 +12,18 @@ class Radikal_tr(BasicNewsRecipe): description = 'News from Turkey' publisher = 'radikal' category = 'news, politics, Turkey' - oldest_article = 7 + oldest_article = 2 max_articles_per_feed = 150 no_stylesheets = True - encoding = 'cp1254' + #encoding = 'iso-8859-9' use_embedded_content = False - auto_cleanup = True - #auto_cleanup_keep = '//div[@class="thumbnail"]' + remove_empty_feeds = True + auto_cleanup = False masthead_url = 'http://www.radikal.com.tr/D/i/1/V2/radikal_logo.jpg' language = 'tr' + publication_type = 'newspaper' extra_css = """ @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} - .article_description,body{font-family: Arial,Verdana,Helvetica,sans1,sans-serif} + body{font-family: 'PT Sans',Arial,Helvetica,sans1,sans-serif} """ conversion_options = { @@ -32,26 +33,28 @@ class Radikal_tr(BasicNewsRecipe): , 'language' : language } - #remove_tags = [dict(name=['embed','iframe','object','link','base'])] - #remove_tags_before = dict(name='h1') - #remove_tags_after = dict(attrs={'id':'haberDetayYazi'}) - - + remove_tags = [ + dict(name=['meta','iframe','embed','object','link','base']), + dict(name='div', attrs={'class':['options','news_related','browserWidth_shareBox']}), + dict(attrs={'class':['breadcrumb clearfix','box_title']}) + ] + + keep_only_tags = [ + dict(attrs={'class':['news-content-header', + 'news-content-text clearfix', + 'author-content-text', + 'news_detail_top', + 'news_article']}) + ] + feeds = [ - (u'Yazarlar' , u'http://www.radikal.com.tr/d/rss/RssYazarlar.xml') - ,(u'Turkiye' , u'http://www.radikal.com.tr/d/rss/Rss_97.xml' ) - ,(u'Politika' , u'http://www.radikal.com.tr/d/rss/Rss_98.xml' ) - ,(u'Dis Haberler', u'http://www.radikal.com.tr/d/rss/Rss_100.xml' ) - ,(u'Ekonomi' , u'http://www.radikal.com.tr/d/rss/Rss_101.xml' ) - ,(u'Radikal Iki' , u'http://www.radikal.com.tr/d/rss/Rss_42.xml') - ,(u'Radikal Hayat' , u'http://www.radikal.com.tr/d/rss/Rss_41.xml' ) - ,(u'Radikal Kitap' , u'http://www.radikal.com.tr/d/rss/Rss_40.xml' ) + (u'Yazarlar' , u'http://www.radikal.com.tr/d/rss/RssYazarlar.xml') + ,(u'Türkiye' , u'http://www.radikal.com.tr/d/rss/Rss_77.xml' ) + ,(u'Politika' , u'http://www.radikal.com.tr/d/rss/Rss_78.xml' ) + ,(u'Dünya' , u'http://www.radikal.com.tr/d/rss/Rss_81.xml' ) + ,(u'Ekonomi' , u'http://www.radikal.com.tr/d/rss/Rss_80.xml' ) + ,(u'Radikal 2' , u'http://www.radikal.com.tr/d/rss/Rss_42.xml' ) + ,(u'Radikal Hayat' , u'http://www.radikal.com.tr/d/rss/Rss_41.xml' ) + ,(u'Radikal Kitap' , u'http://www.radikal.com.tr/d/rss/Rss_40.xml' ) + ,(u'Spor' , u'http://www.radikal.com.tr/d/rss/Rss_84.xml' ) ] - - #def print_version(self, url): - #articleid = url.rpartition('ArticleID=')[2] - #return 'http://www.radikal.com.tr/Default.aspx?aType=HaberYazdir&ArticleID=' + articleid - - #def preprocess_html(self, soup): - #return self.adeify_images(soup) -