calibre/recipes/todays_zaman.recipe
2013-01-02 23:41:59 +05:30

59 lines
4.3 KiB
Plaintext

from calibre.web.feeds.news import BasicNewsRecipe
class TodaysZaman_en(BasicNewsRecipe):
title = u'Todays Zaman'
__author__ = u'thomass'
description = 'a Turkey based daily for national and international news in the fields of business, diplomacy, politics, culture, arts, sports and economics, in addition to commentaries, specials and features'
oldest_article = 2
max_articles_per_feed =100
no_stylesheets = True
#delay = 1
#use_embedded_content = False
encoding = 'utf-8'
#publisher = ' '
category = 'news, haberler,TR,gazete'
language = 'en_TR'
publication_type = 'newspaper'
#extra_css = ' body{ font-family: Verdana,Helvetica,Arial,sans-serif } .introduction{font-weight: bold} .story-feature{display: block; padding: 0; border: 1px solid; width: 40%; font-size: small} .story-feature h2{text-align: center; text-transform: uppercase} '
#keep_only_tags = [dict(name='font', attrs={'class':['newsDetail','agenda2NewsSpot']}),dict(name='span', attrs={'class':['agenda2Title']}),dict(name='div', attrs={'id':['gallery']})]
keep_only_tags = [dict(name='h1', attrs={'class':['georgia_30']}),dict(name='span', attrs={'class':['left-date','detailDate','detailCName']}),dict(name='td', attrs={'id':['newsSpot','newsText']})] #resim ekleme: ,dict(name='div', attrs={'id':['gallery','detailDate',]})
remove_attributes = ['aria-describedby']
remove_tags = [dict(name='img', attrs={'src':['/images/icon_print.gif','http://gmodules.com/ig/images/plus_google.gif','/images/template/jazz/agenda/i1.jpg', 'http://medya.todayszaman.com/todayszaman/images/logo/logo.bmp']}),dict(name='hr', attrs={'class':[ 'interactive-hr']}),dict(name='div', attrs={'class':[ 'empty_height_18','empty_height_9']}) ,dict(name='td', attrs={'id':[ 'superTitle']}),dict(name='span', attrs={'class':[ 't-count enabled t-count-focus']}),dict(name='a', attrs={'id':[ 'count']}),dict(name='td', attrs={'class':[ 'left-date']}) ]
cover_img_url = 'http://medya.todayszaman.com/todayszaman/images/logo/logo.bmp'
masthead_url = 'http://medya.todayszaman.com/todayszaman/images/logo/logo.bmp'
remove_empty_feeds= True
# remove_attributes = ['width','height']
feeds = [
( u'Home', u'http://www.todayszaman.com/0.rss'),
( u'Sports', u'http://www.todayszaman.com/5.rss'),
( u'Columnists', u'http://www.todayszaman.com/6.rss'),
( u'Interviews', u'http://www.todayszaman.com/9.rss'),
( u'News', u'http://www.todayszaman.com/100.rss'),
( u'National', u'http://www.todayszaman.com/101.rss'),
( u'Diplomacy', u'http://www.todayszaman.com/102.rss'),
( u'World', u'http://www.todayszaman.com/104.rss'),
( u'Business', u'http://www.todayszaman.com/105.rss'),
( u'Op-Ed', u'http://www.todayszaman.com/109.rss'),
( u'Arts & Culture', u'http://www.todayszaman.com/110.rss'),
( u'Features', u'http://www.todayszaman.com/116.rss'),
( u'Travel', u'http://www.todayszaman.com/117.rss'),
( u'Food', u'http://www.todayszaman.com/124.rss'),
( u'Press Review', u'http://www.todayszaman.com/130.rss'),
( u'Expat Zone', u'http://www.todayszaman.com/132.rss'),
( u'Life', u'http://www.todayszaman.com/133.rss'),
( u'Think Tanks', u'http://www.todayszaman.com/159.rss'),
( u'Almanac', u'http://www.todayszaman.com/161.rss'),
( u'Health', u'http://www.todayszaman.com/162.rss'),
( u'Fashion & Beauty', u'http://www.todayszaman.com/163.rss'),
( u'Science & Technology', u'http://www.todayszaman.com/349.rss'),
]
#def preprocess_html(self, soup):
# return self.adeify_images(soup)
#def print_version(self, url): #there is a probem caused by table format
#return url.replace('http://www.todayszaman.com/newsDetail_getNewsById.action?load=detay&', 'http://www.todayszaman.com/newsDetail_openPrintPage.action?')