New recipes for HRT and RTS by Darko Miletic

This commit is contained in:
Kovid Goyal 2009-05-13 09:42:48 -07:00
parent 1a1cf7f1b9
commit 1c56b03f95
5 changed files with 127 additions and 1 deletions

Binary file not shown.

After

Width:  |  Height:  |  Size: 606 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 458 B

View File

@ -42,7 +42,7 @@ recipe_modules = ['recipe_' + r for r in (
'moneynews', 'der_standard', 'diepresse', 'nzz_ger', 'hna', 'moneynews', 'der_standard', 'diepresse', 'nzz_ger', 'hna',
'seattle_times', 'scott_hanselman', 'coding_horror', 'twitchfilms', 'seattle_times', 'scott_hanselman', 'coding_horror', 'twitchfilms',
'stackoverflow', 'telepolis_artikel', 'zaobao', 'usnews', 'stackoverflow', 'telepolis_artikel', 'zaobao', 'usnews',
'straitstimes', 'index_hu', 'pcworld_hu', 'straitstimes', 'index_hu', 'pcworld_hu', 'hrt', 'rts',
)] )]
import re, imp, inspect, time, os import re, imp, inspect, time, os

View File

@ -0,0 +1,66 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
'''
www.hrt.hr
'''
import re
from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag
class HRT(BasicNewsRecipe):
title = 'HRT: Vesti'
__author__ = 'Darko Miletic'
description = 'News from Croatia'
publisher = 'HRT'
category = 'news, politics, Croatia, HRT'
no_stylesheets = True
encoding = 'utf-8'
use_embedded_content = False
language = _("Croatian")
lang = 'hr-HR'
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: serif1, serif}'
html2lrf_options = [
'--comment', description
, '--category', category
, '--publisher', publisher
]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\noverride_css=" p {text-indent: 0em; margin-top: 0em; margin-bottom: 0.5em} img {margin-top: 0em; margin-bottom: 0.4em}"'
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
keep_only_tags = [dict(name='div', attrs={'class':'bigVijest'})]
remove_tags = [dict(name=['object','link','embed'])]
remove_tags_after = dict(name='div', attrs={'class':'nsAuthor'})
feeds = [
(u'Vijesti' , u'http://www.hrt.hr/?id=316&type=100&rss=vijesti' )
,(u'Sport' , u'http://www.hrt.hr/?id=316&type=100&rss=sport' )
,(u'Zabava' , u'http://www.hrt.hr/?id=316&type=100&rss=zabava' )
,(u'Filmovi i serije' , u'http://www.hrt.hr/?id=316&type=100&rss=filmovi' )
,(u'Dokumentarni program', u'http://www.hrt.hr/?id=316&type=100&rss=dokumentarci')
,(u'Glazba' , u'http://www.hrt.hr/?id=316&type=100&rss=glazba' )
,(u'Kultura' , u'http://www.hrt.hr/?id=316&type=100&rss=kultura' )
,(u'Mladi' , u'http://www.hrt.hr/?id=316&type=100&rss=mladi' )
,(u'Manjine' , u'http://www.hrt.hr/?id=316&type=100&rss=manjine' )
,(u'Radio' , u'http://www.hrt.hr/?id=316&type=100&rss=radio' )
]
def preprocess_html(self, soup):
soup.html['xml:lang'] = self.lang
soup.html['lang'] = self.lang
mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=UTF-8")])
soup.head.insert(0,mlang)
soup.head.insert(1,mcharset)
for item in soup.findAll(style=True):
del item['style']
return self.adeify_images(soup)

View File

@ -0,0 +1,60 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
'''
www.rts.rs
'''
import re
from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag
class RTS(BasicNewsRecipe):
title = 'RTS: Vesti'
__author__ = 'Darko Miletic'
description = 'News from Serbia'
publisher = 'RTS'
category = 'news, politics, Serbia, RTS'
no_stylesheets = True
encoding = 'utf-8'
use_embedded_content = True
language = _("Serbian")
lang = 'sr-Latn-RS'
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: serif1, serif}'
html2lrf_options = [
'--comment', description
, '--category', category
, '--publisher', publisher
]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\noverride_css=" p {text-indent: 0em; margin-top: 0em; margin-bottom: 0.5em} img {margin-top: 0em; margin-bottom: 0.4em}"'
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
feeds = [
(u'Vesti' , u'http://www.rts.rs/page/stories/sr/rss.html' )
,(u'Srbija' , u'http://www.rts.rs/page/stories/sr/rss/9/Srbija.html' )
,(u'Region' , u'http://www.rts.rs/page/stories/sr/rss/11/Region.html' )
,(u'Svet' , u'http://www.rts.rs/page/stories/sr/rss/10/Svet.html' )
,(u'Hronika' , u'http://www.rts.rs/page/stories/sr/rss/135/Hronika.html' )
,(u'Drustvo' , u'http://www.rts.rs/page/stories/sr/rss/125/Dru%C5%A1tvo.html')
,(u'Ekonomija' , u'http://www.rts.rs/page/stories/sr/rss/13/Ekonomija.html' )
,(u'Nauka' , u'http://www.rts.rs/page/stories/sr/rss/14/Nauka.html' )
,(u'Kultura' , u'http://www.rts.rs/page/stories/sr/rss/16/Kultura.html' )
,(u'Zanimljivosti' , u'http://www.rts.rs/page/stories/sr/rss/15/Zanimljivosti.html')
,(u'Sport' , u'http://www.rts.rs/page/sport/sr/rss.html' )
]
def preprocess_html(self, soup):
soup.html['xml:lang'] = self.lang
soup.html['lang'] = self.lang
mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=UTF-8")])
soup.head.insert(0,mlang)
soup.head.insert(1,mcharset)
return self.adeify_images(soup)