Various Turkish news sources by thomass

This commit is contained in:
Kovid Goyal 2011-08-23 16:08:08 -06:00
parent 967285b9f6
commit 0901c5807c
4 changed files with 225 additions and 0 deletions

View File

@ -0,0 +1,57 @@
# -*- coding: utf-8 -*-
from calibre.web.feeds.news import BasicNewsRecipe
class Bugun (BasicNewsRecipe):
title = u'BUGÜN Gazetesi'
__author__ = u'thomass'
oldest_article = 2
max_articles_per_feed =100
#no_stylesheets = True
#delay = 1
use_embedded_content = False
encoding = 'UTF-8'
publisher = 'thomass'
category = 'news, haberler,TR,gazete'
language = 'tr'
publication_type = 'newspaper '
extra_css = ' div{font-size: small} h2{font-size: small;font-weight: bold} #ctl00_ortayer_haberBaslik{font-size:20px;font-weight: bold} '#h1{ font-size:10%;font-weight: bold} '#ctl00_ortayer_haberBaslik{ 'font-size:10%;font-weight: bold'}
#introduction{} .story-feature{display: block; padding: 0; border: 1px solid; width: 40%; font-size: small} .story-feature h2{text-align: center; text-transform: uppercase} '
conversion_options = {
'tags' : category
,'language' : language
,'publisher' : publisher
,'linearize_tables': True
}
cover_img_url = 'http://www.bugun.com.tr/images/bugunLogo2011.png'
masthead_url = 'http://www.bugun.com.tr/images/bugunLogo2011.png'
keep_only_tags = [dict(name='h1', attrs={'class':[ 'haberBaslik']}),dict(name='h2', attrs={'class':[ 'haberOzet']}), dict(name='div', attrs={'class':['haberGriDivvvv']}), dict(name='div', attrs={'id':[ 'haberTextDiv']}), ]
#keep_only_tags = [dict(name='div', attrs={'id':[ 'news-detail-content']}), dict(name='td', attrs={'class':['columnist-detail','columnist_head']}) ]
#remove_tags = [ dict(name='div', attrs={'id':['news-detail-news-text-font-size','news-detail-gallery','news-detail-news-bottom-social']}),dict(name='div', attrs={'class':['radioEmbedBg','radyoProgramAdi']}),dict(name='a', attrs={'class':['webkit-html-attribute-value webkit-html-external-link']}),dict(name='table', attrs={'id':['yaziYorumTablosu']}),dict(name='img', attrs={'src':['http://medya.zaman.com.tr/pics/paylas.gif','http://medya.zaman.com.tr/extentions/zaman.com.tr/img/columnist/ma-16.png']})]
#remove_attributes = ['width','height']
remove_empty_feeds= True
feeds = [
( u'Son Dakika', u'http://www.bugun.com.tr/haberler.xml'),
( u'Yazarlar', u'http://www.bugun.com.tr/rss/yazarlar.xml'),
( u'Gündem', u'http://www.bugun.com.tr/rss/gundem.xml'),
( u'Ekonomi', u'http://www.bugun.com.tr/rss/ekonomi.xml'),
( u'Spor', u'http://www.bugun.com.tr/rss/spor.xml'),
( u'Magazin', u'http://www.bugun.com.tr/rss/magazin.xml'),
( u'Teknoloji', u'http://www.bugun.com.tr/rss/teknoloji.xml'),
( u'Yaşam', u'http://www.bugun.com.tr/rss/yasam.xml'),
( u'Medya', u'http://www.bugun.com.tr/rss/medya.xml'),
( u'Dünya', u'http://www.bugun.com.tr/rss/dunya.xml'),
( u'Politika', u'http://www.bugun.com.tr/rss/politika.xml'),
( u'Sağlık', u'http://www.bugun.com.tr/rss/saglik.xml'),
( u'Tarifler', u'http://www.bugun.com.tr/rss/yemek-tarifi.xml'),
]

View File

@ -0,0 +1,52 @@
# -*- coding: utf-8 -*-
from calibre.web.feeds.news import BasicNewsRecipe
class Yagmur(BasicNewsRecipe):
title = u'Yagmur Dergisi'
__author__ = u'thomass'
description = 'Üç Aylık Dil, Kültür ve Edebiyat Dergisi'
oldest_article = 90
max_articles_per_feed =100
no_stylesheets = True
#delay = 1
#use_embedded_content = False
#publisher = ' '
category = 'dergi, ilim, kültür, edebiyat,Türkçe'
language = 'tr'
publication_type = 'magazine'
encoding = 'ISO 8859-9'
publisher = 'thomass'
#extra_css = ' body{ font-family: Verdana,Helvetica,Arial,sans-serif } .introduction{font-weight: bold} .story-feature{display: block; padding: 0; border: 1px solid; width: 40%; font-size: small} .story-feature h2{text-align: center; text-transform: uppercase} '
conversion_options = {
'tags' : category
,'language' : language
,'publisher' : publisher
,'linearize_tables': True
}
#extra_css = ' body{ font-family: Verdana,Helvetica,Arial,sans-serif } .introduction{font-weight: bold} .story-feature{display: block; padding: 0; border: 1px solid; width: 40%; font-size: small} .story-feature h2{text-align: center; text-transform: uppercase} '
#keep_only_tags = [dict(name='h1', attrs={'class':['georgia_30']})]
#remove_attributes = ['aria-describedby']
#remove_tags = [dict(name='div', attrs={'id':['renk10']}) ]
cover_img_url = 'http://www.sizinti.com.tr/images/dergiler/d2.gif'
masthead_url = 'http://www.sizinti.com.tr/images/dergiler/d2.gif'
#remove_tags_before = dict(id='content-right')
#remove_empty_feeds= True
#remove_attributes = ['width','height']
feeds = [
( u'Yagmur', u'http://open.dapper.net/services/yagmur'),
]
#def preprocess_html(self, soup):
# return self.adeify_images(soup)
def print_version(self, url): #there is a probem caused by table format
return url.replace('http://www.yagmurdergisi.com.tr/konu_goster.php?konu_id=', 'http://www.yagmurdergisi.com.tr/yazformati.php?konu_id=')

View File

@ -0,0 +1,52 @@
# -*- coding: utf-8 -*-
from calibre.web.feeds.news import BasicNewsRecipe
class YeniUmit(BasicNewsRecipe):
title = u'Yeni Umit Dergisi'
__author__ = u'thomass'
description = 'Aylık Dini İlimler ve Kültür Dergisi'
oldest_article = 45
max_articles_per_feed =100
no_stylesheets = True
#delay = 1
#use_embedded_content = False
#publisher = ' '
category = 'dergi, ilim, kültür, edebiyat,Türkçe'
language = 'tr'
publication_type = 'magazine'
encoding = 'ISO 8859-9'
publisher = 'thomass'
#extra_css = ' body{ font-family: Verdana,Helvetica,Arial,sans-serif } .introduction{font-weight: bold} .story-feature{display: block; padding: 0; border: 1px solid; width: 40%; font-size: small} .story-feature h2{text-align: center; text-transform: uppercase} '
conversion_options = {
'tags' : category
,'language' : language
,'publisher' : publisher
,'linearize_tables': True
}
#extra_css = ' body{ font-family: Verdana,Helvetica,Arial,sans-serif } .introduction{font-weight: bold} .story-feature{display: block; padding: 0; border: 1px solid; width: 40%; font-size: small} .story-feature h2{text-align: center; text-transform: uppercase} '
#keep_only_tags = [dict(name='h1', attrs={'class':['georgia_30']})]
#remove_attributes = ['aria-describedby']
#remove_tags = [dict(name='div', attrs={'id':['renk10']}) ]
cover_img_url = 'http://www.sizinti.com.tr/images/dergiler/d1.gif'
masthead_url = 'http://www.sizinti.com.tr/images/dergiler/d1.gif'
#remove_tags_before = dict(id='content-right')
#remove_empty_feeds= True
#remove_attributes = ['width','height']
feeds = [
( u'Yeni Umit', u'http://open.dapper.net/services/yeniumit'),
]
#def preprocess_html(self, soup):
# return self.adeify_images(soup)
def print_version(self, url): #there is a probem caused by table format
return url.replace('http://www.yeniumit.com.tr/konular', 'http://www.yeniumit.com.tr/yazdir')

View File

@ -0,0 +1,64 @@
# -*- coding: utf-8 -*-
from calibre.web.feeds.news import BasicNewsRecipe
class Bugun (BasicNewsRecipe):
title = u'Yenişafak Gazetesi'
__author__ = u'thomass'
oldest_article = 2
max_articles_per_feed =100
no_stylesheets = True
#delay = 1
use_embedded_content = False
encoding = 'ISO 8859-9' #'UTF-8'
publisher = 'thomass'
category = 'news, haberler,TR,gazete'
language = 'tr'
publication_type = 'newspaper '
#extra_css = ' body{ font-family: Verdana,Helvetica,Arial,sans-serif } .introduction{font-weight: bold} .story-feature{display: block; padding: 0; border: 1px solid; width: 40%; font-size: small} .story-feature h2{text-align: center; text-transform: uppercase} '
conversion_options = {
'tags' : category
,'language' : language
,'publisher' : publisher
,'linearize_tables': True
}
cover_img_url = 'http://yenisafak.com.tr/resim/logo.gif'
masthead_url = 'http://yenisafak.com.tr/resim/logo.gif'
keep_only_tags = [dict(name='div', attrs={'id':[ 'ctghaberdetay2010']}) ]
extra_css = ' h1{font-size:20px;font-weight: bold}h2{font-size: small;font-weight: bold}div{font-size: small} '#h1{ font-size:10%;font-weight: bold} '#ctl00_ortayer_haberBaslik{ 'font-size:10%;font-weight: bold'}
#keep_only_tags = [dict(name='div', attrs={'id':[ 'news-detail-content']}), dict(name='td', attrs={'class':['columnist-detail','columnist_head']}) ]
remove_tags = [ dict(name='div', attrs={'id':['yasaluyari2010','divhaberdetayilisik2010']}),dict(name='font', attrs={'class':['haberdetaytarih']})]#,'news-detail-gallery','news-detail-news-bottom-social']}),dict(name='div', attrs={'class':['radioEmbedBg','radyoProgramAdi']}),dict(name='a', attrs={'class':['webkit-html-attribute-value webkit-html-external-link']}),dict(name='table', attrs={'id':['yaziYorumTablosu']}),dict(name='img', attrs={'src':['http://medya.zaman.com.tr/pics/paylas.gif','http://medya.zaman.com.tr/extentions/zaman.com.tr/img/columnist/ma-16.png']})]
#remove_attributes = ['width','height']
remove_empty_feeds= True
feeds = [
( u'SonDakika', u'http://yenisafak.com.tr/rss/?xml=anasayfa'),
( u'Gündem', u'http://yenisafak.com.tr/rss/?xml=gundem'),
( u'Politika', u'http://yenisafak.com.tr/rss/?xml=politika'),
( u'Ekonomi', u'http://yenisafak.com.tr/rss/?xml=ekonomi'),
( u'Dünya', u'http://yenisafak.com.tr/rss/?xml=dunya'),
( u'Aktüel', u'http://yenisafak.com.tr/rss/?xml=aktuel'),
( u'Eğitim', u'http://yenisafak.com.tr/rss/?xml=egitim'),
( u'Spor', u'http://yenisafak.com.tr/rss/?xml=spor'),
( u'Yazarlar', u'http://yenisafak.com.tr/rss/?xml=yazarlar'),
( u'Televizyon', u'http://yenisafak.com.tr/rss/?xml=televizyon'),
( u'Sağlık', u'http://yenisafak.com.tr/rss/?xml=saglik'),
( u'Yurt Haberler', u'http://yenisafak.com.tr/rss/?xml=yurthaberler'),
( u'Bilişim', u'http://yenisafak.com.tr/rss/?xml=bilisim'),
( u'Diziler', u'http://yenisafak.com.tr/rss/?xml=diziler'),
( u'Kültür-Sanat', u'http://yenisafak.com.tr/rss/?xml=kultursanat'),
( u'Röportaj', u'http://yenisafak.com.tr/rss/?xml=roportaj'),
( u'Sinema', u'http://yenisafak.com.tr/rss/?xml=sinema'),
( u'Yorum', u'http://yenisafak.com.tr/rss/?xml=yorum'),
( u' Yeni Şafak Pazar', u'http://yenisafak.com.tr/rss/?xml=pazar'),
( u'Yeni Şafak Kitap', u'http://yenisafak.com.tr/rss/?xml=kitap'),
( u'Yeni Şafak English', u'http://yenisafak.com.tr/rss/?xml=english'),
]