From 0901c5807c5f956e044639350f41cdbb9ebcf07f Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 23 Aug 2011 16:08:08 -0600 Subject: [PATCH] Various Turkish news sources by thomass --- recipes/bugun_gazetesi.recipe | 57 +++++++++++++++++++++++++++ recipes/yagmur_dergisi.recipe | 52 +++++++++++++++++++++++++ recipes/yeni_umit_dergisi.recipe | 52 +++++++++++++++++++++++++ recipes/yenisafak_gazetesi.recipe | 64 +++++++++++++++++++++++++++++++ 4 files changed, 225 insertions(+) create mode 100644 recipes/bugun_gazetesi.recipe create mode 100644 recipes/yagmur_dergisi.recipe create mode 100644 recipes/yeni_umit_dergisi.recipe create mode 100644 recipes/yenisafak_gazetesi.recipe diff --git a/recipes/bugun_gazetesi.recipe b/recipes/bugun_gazetesi.recipe new file mode 100644 index 0000000000..0a1d27f517 --- /dev/null +++ b/recipes/bugun_gazetesi.recipe @@ -0,0 +1,57 @@ +# -*- coding: utf-8 -*- + +from calibre.web.feeds.news import BasicNewsRecipe + +class Bugun (BasicNewsRecipe): + + title = u'BUGÜN Gazetesi' + __author__ = u'thomass' + oldest_article = 2 + max_articles_per_feed =100 + #no_stylesheets = True + #delay = 1 + use_embedded_content = False + encoding = 'UTF-8' + publisher = 'thomass' + category = 'news, haberler,TR,gazete' + language = 'tr' + publication_type = 'newspaper ' + extra_css = ' div{font-size: small} h2{font-size: small;font-weight: bold} #ctl00_ortayer_haberBaslik{font-size:20px;font-weight: bold} '#h1{ font-size:10%;font-weight: bold} '#ctl00_ortayer_haberBaslik{ 'font-size:10%;font-weight: bold'} + #introduction{} .story-feature{display: block; padding: 0; border: 1px solid; width: 40%; font-size: small} .story-feature h2{text-align: center; text-transform: uppercase} ' + conversion_options = { + 'tags' : category + ,'language' : language + ,'publisher' : publisher + ,'linearize_tables': True + } + cover_img_url = 'http://www.bugun.com.tr/images/bugunLogo2011.png' + masthead_url = 'http://www.bugun.com.tr/images/bugunLogo2011.png' + + keep_only_tags = [dict(name='h1', attrs={'class':[ 'haberBaslik']}),dict(name='h2', attrs={'class':[ 'haberOzet']}), dict(name='div', attrs={'class':['haberGriDivvvv']}), dict(name='div', attrs={'id':[ 'haberTextDiv']}), ] + + #keep_only_tags = [dict(name='div', attrs={'id':[ 'news-detail-content']}), dict(name='td', attrs={'class':['columnist-detail','columnist_head']}) ] + #remove_tags = [ dict(name='div', attrs={'id':['news-detail-news-text-font-size','news-detail-gallery','news-detail-news-bottom-social']}),dict(name='div', attrs={'class':['radioEmbedBg','radyoProgramAdi']}),dict(name='a', attrs={'class':['webkit-html-attribute-value webkit-html-external-link']}),dict(name='table', attrs={'id':['yaziYorumTablosu']}),dict(name='img', attrs={'src':['http://medya.zaman.com.tr/pics/paylas.gif','http://medya.zaman.com.tr/extentions/zaman.com.tr/img/columnist/ma-16.png']})] + + + #remove_attributes = ['width','height'] + remove_empty_feeds= True + + feeds = [ + ( u'Son Dakika', u'http://www.bugun.com.tr/haberler.xml'), + ( u'Yazarlar', u'http://www.bugun.com.tr/rss/yazarlar.xml'), + ( u'Gündem', u'http://www.bugun.com.tr/rss/gundem.xml'), + ( u'Ekonomi', u'http://www.bugun.com.tr/rss/ekonomi.xml'), + ( u'Spor', u'http://www.bugun.com.tr/rss/spor.xml'), + ( u'Magazin', u'http://www.bugun.com.tr/rss/magazin.xml'), + ( u'Teknoloji', u'http://www.bugun.com.tr/rss/teknoloji.xml'), + ( u'Yaşam', u'http://www.bugun.com.tr/rss/yasam.xml'), + ( u'Medya', u'http://www.bugun.com.tr/rss/medya.xml'), + ( u'Dünya', u'http://www.bugun.com.tr/rss/dunya.xml'), + ( u'Politika', u'http://www.bugun.com.tr/rss/politika.xml'), + ( u'Sağlık', u'http://www.bugun.com.tr/rss/saglik.xml'), + ( u'Tarifler', u'http://www.bugun.com.tr/rss/yemek-tarifi.xml'), + + + + + ] diff --git a/recipes/yagmur_dergisi.recipe b/recipes/yagmur_dergisi.recipe new file mode 100644 index 0000000000..786a628a0c --- /dev/null +++ b/recipes/yagmur_dergisi.recipe @@ -0,0 +1,52 @@ +# -*- coding: utf-8 -*- + +from calibre.web.feeds.news import BasicNewsRecipe + +class Yagmur(BasicNewsRecipe): + title = u'Yagmur Dergisi' + __author__ = u'thomass' + description = 'Üç Aylık Dil, Kültür ve Edebiyat Dergisi' + oldest_article = 90 + max_articles_per_feed =100 + no_stylesheets = True + #delay = 1 + #use_embedded_content = False + + #publisher = ' ' + category = 'dergi, ilim, kültür, edebiyat,Türkçe' + language = 'tr' + publication_type = 'magazine' + encoding = 'ISO 8859-9' + publisher = 'thomass' + + + + #extra_css = ' body{ font-family: Verdana,Helvetica,Arial,sans-serif } .introduction{font-weight: bold} .story-feature{display: block; padding: 0; border: 1px solid; width: 40%; font-size: small} .story-feature h2{text-align: center; text-transform: uppercase} ' + conversion_options = { + 'tags' : category + ,'language' : language + ,'publisher' : publisher + ,'linearize_tables': True + } + #extra_css = ' body{ font-family: Verdana,Helvetica,Arial,sans-serif } .introduction{font-weight: bold} .story-feature{display: block; padding: 0; border: 1px solid; width: 40%; font-size: small} .story-feature h2{text-align: center; text-transform: uppercase} ' + #keep_only_tags = [dict(name='h1', attrs={'class':['georgia_30']})] + + #remove_attributes = ['aria-describedby'] + #remove_tags = [dict(name='div', attrs={'id':['renk10']}) ] + cover_img_url = 'http://www.sizinti.com.tr/images/dergiler/d2.gif' + masthead_url = 'http://www.sizinti.com.tr/images/dergiler/d2.gif' + #remove_tags_before = dict(id='content-right') + + + #remove_empty_feeds= True + #remove_attributes = ['width','height'] + + feeds = [ + ( u'Yagmur', u'http://open.dapper.net/services/yagmur'), + ] + + #def preprocess_html(self, soup): + # return self.adeify_images(soup) + def print_version(self, url): #there is a probem caused by table format + return url.replace('http://www.yagmurdergisi.com.tr/konu_goster.php?konu_id=', 'http://www.yagmurdergisi.com.tr/yazformati.php?konu_id=') + diff --git a/recipes/yeni_umit_dergisi.recipe b/recipes/yeni_umit_dergisi.recipe new file mode 100644 index 0000000000..24b95acae4 --- /dev/null +++ b/recipes/yeni_umit_dergisi.recipe @@ -0,0 +1,52 @@ +# -*- coding: utf-8 -*- + +from calibre.web.feeds.news import BasicNewsRecipe + +class YeniUmit(BasicNewsRecipe): + title = u'Yeni Umit Dergisi' + __author__ = u'thomass' + description = 'Aylık Dini İlimler ve Kültür Dergisi' + oldest_article = 45 + max_articles_per_feed =100 + no_stylesheets = True + #delay = 1 + #use_embedded_content = False + + #publisher = ' ' + category = 'dergi, ilim, kültür, edebiyat,Türkçe' + language = 'tr' + publication_type = 'magazine' + encoding = 'ISO 8859-9' + publisher = 'thomass' + + + + #extra_css = ' body{ font-family: Verdana,Helvetica,Arial,sans-serif } .introduction{font-weight: bold} .story-feature{display: block; padding: 0; border: 1px solid; width: 40%; font-size: small} .story-feature h2{text-align: center; text-transform: uppercase} ' + conversion_options = { + 'tags' : category + ,'language' : language + ,'publisher' : publisher + ,'linearize_tables': True + } + #extra_css = ' body{ font-family: Verdana,Helvetica,Arial,sans-serif } .introduction{font-weight: bold} .story-feature{display: block; padding: 0; border: 1px solid; width: 40%; font-size: small} .story-feature h2{text-align: center; text-transform: uppercase} ' + #keep_only_tags = [dict(name='h1', attrs={'class':['georgia_30']})] + + #remove_attributes = ['aria-describedby'] + #remove_tags = [dict(name='div', attrs={'id':['renk10']}) ] + cover_img_url = 'http://www.sizinti.com.tr/images/dergiler/d1.gif' + masthead_url = 'http://www.sizinti.com.tr/images/dergiler/d1.gif' + #remove_tags_before = dict(id='content-right') + + + #remove_empty_feeds= True + #remove_attributes = ['width','height'] + + feeds = [ + ( u'Yeni Umit', u'http://open.dapper.net/services/yeniumit'), + ] + + #def preprocess_html(self, soup): + # return self.adeify_images(soup) + def print_version(self, url): #there is a probem caused by table format + return url.replace('http://www.yeniumit.com.tr/konular', 'http://www.yeniumit.com.tr/yazdir') + diff --git a/recipes/yenisafak_gazetesi.recipe b/recipes/yenisafak_gazetesi.recipe new file mode 100644 index 0000000000..afcec76508 --- /dev/null +++ b/recipes/yenisafak_gazetesi.recipe @@ -0,0 +1,64 @@ +# -*- coding: utf-8 -*- + +from calibre.web.feeds.news import BasicNewsRecipe + +class Bugun (BasicNewsRecipe): + + title = u'Yenişafak Gazetesi' + __author__ = u'thomass' + oldest_article = 2 + max_articles_per_feed =100 + no_stylesheets = True + #delay = 1 + use_embedded_content = False + encoding = 'ISO 8859-9' #'UTF-8' + publisher = 'thomass' + category = 'news, haberler,TR,gazete' + language = 'tr' + publication_type = 'newspaper ' + #extra_css = ' body{ font-family: Verdana,Helvetica,Arial,sans-serif } .introduction{font-weight: bold} .story-feature{display: block; padding: 0; border: 1px solid; width: 40%; font-size: small} .story-feature h2{text-align: center; text-transform: uppercase} ' + conversion_options = { + 'tags' : category + ,'language' : language + ,'publisher' : publisher + ,'linearize_tables': True + } + cover_img_url = 'http://yenisafak.com.tr/resim/logo.gif' + masthead_url = 'http://yenisafak.com.tr/resim/logo.gif' + + keep_only_tags = [dict(name='div', attrs={'id':[ 'ctghaberdetay2010']}) ] + extra_css = ' h1{font-size:20px;font-weight: bold}h2{font-size: small;font-weight: bold}div{font-size: small} '#h1{ font-size:10%;font-weight: bold} '#ctl00_ortayer_haberBaslik{ 'font-size:10%;font-weight: bold'} + + #keep_only_tags = [dict(name='div', attrs={'id':[ 'news-detail-content']}), dict(name='td', attrs={'class':['columnist-detail','columnist_head']}) ] + remove_tags = [ dict(name='div', attrs={'id':['yasaluyari2010','divhaberdetayilisik2010']}),dict(name='font', attrs={'class':['haberdetaytarih']})]#,'news-detail-gallery','news-detail-news-bottom-social']}),dict(name='div', attrs={'class':['radioEmbedBg','radyoProgramAdi']}),dict(name='a', attrs={'class':['webkit-html-attribute-value webkit-html-external-link']}),dict(name='table', attrs={'id':['yaziYorumTablosu']}),dict(name='img', attrs={'src':['http://medya.zaman.com.tr/pics/paylas.gif','http://medya.zaman.com.tr/extentions/zaman.com.tr/img/columnist/ma-16.png']})] + + + #remove_attributes = ['width','height'] + remove_empty_feeds= True + + feeds = [ + ( u'SonDakika', u'http://yenisafak.com.tr/rss/?xml=anasayfa'), + ( u'Gündem', u'http://yenisafak.com.tr/rss/?xml=gundem'), + ( u'Politika', u'http://yenisafak.com.tr/rss/?xml=politika'), + ( u'Ekonomi', u'http://yenisafak.com.tr/rss/?xml=ekonomi'), + ( u'Dünya', u'http://yenisafak.com.tr/rss/?xml=dunya'), + ( u'Aktüel', u'http://yenisafak.com.tr/rss/?xml=aktuel'), + ( u'Eğitim', u'http://yenisafak.com.tr/rss/?xml=egitim'), + ( u'Spor', u'http://yenisafak.com.tr/rss/?xml=spor'), + ( u'Yazarlar', u'http://yenisafak.com.tr/rss/?xml=yazarlar'), + ( u'Televizyon', u'http://yenisafak.com.tr/rss/?xml=televizyon'), + ( u'Sağlık', u'http://yenisafak.com.tr/rss/?xml=saglik'), + ( u'Yurt Haberler', u'http://yenisafak.com.tr/rss/?xml=yurthaberler'), + ( u'Bilişim', u'http://yenisafak.com.tr/rss/?xml=bilisim'), + ( u'Diziler', u'http://yenisafak.com.tr/rss/?xml=diziler'), + ( u'Kültür-Sanat', u'http://yenisafak.com.tr/rss/?xml=kultursanat'), + ( u'Röportaj', u'http://yenisafak.com.tr/rss/?xml=roportaj'), + ( u'Sinema', u'http://yenisafak.com.tr/rss/?xml=sinema'), + ( u'Yorum', u'http://yenisafak.com.tr/rss/?xml=yorum'), + ( u' Yeni Şafak Pazar', u'http://yenisafak.com.tr/rss/?xml=pazar'), + ( u'Yeni Şafak Kitap', u'http://yenisafak.com.tr/rss/?xml=kitap'), + ( u'Yeni Şafak English', u'http://yenisafak.com.tr/rss/?xml=english'), + + + + ]