From 578cc310c2ec212eb59cbfec1436340649e23a68 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Fri, 13 Feb 2009 10:36:34 -0800 Subject: [PATCH] Implement #1843 (Various updated recipes for better EPUB support) --- src/calibre/web/feeds/recipes/recipe_b92.py | 34 ++++++++++--------- src/calibre/web/feeds/recipes/recipe_blic.py | 20 +++++------ src/calibre/web/feeds/recipes/recipe_danas.py | 20 +++++------ .../web/feeds/recipes/recipe_elargentino.py | 8 ++--- .../web/feeds/recipes/recipe_granma.py | 16 ++++----- .../web/feeds/recipes/recipe_infobae.py | 21 +++++++----- .../web/feeds/recipes/recipe_jutarnji.py | 31 ++++++++--------- .../feeds/recipes/recipe_juventudrebelde.py | 18 +++++----- src/calibre/web/feeds/recipes/recipe_nin.py | 19 +++++------ .../web/feeds/recipes/recipe_novosti.py | 19 +++++------ src/calibre/web/feeds/recipes/recipe_nspm.py | 32 +++++++++-------- .../web/feeds/recipes/recipe_pescanik.py | 23 ++++++------- .../web/feeds/recipes/recipe_politika.py | 13 ++++--- .../web/feeds/recipes/recipe_vijesti.py | 21 +++++------- src/calibre/web/feeds/recipes/recipe_vreme.py | 26 ++++++++------ 15 files changed, 160 insertions(+), 161 deletions(-) diff --git a/src/calibre/web/feeds/recipes/recipe_b92.py b/src/calibre/web/feeds/recipes/recipe_b92.py index 611647620b..4926de82f5 100644 --- a/src/calibre/web/feeds/recipes/recipe_b92.py +++ b/src/calibre/web/feeds/recipes/recipe_b92.py @@ -1,15 +1,14 @@ #!/usr/bin/env python __license__ = 'GPL v3' -__copyright__ = '2008, Darko Miletic ' +__copyright__ = '2008-2009, Darko Miletic ' ''' b92.net ''' import re +from calibre.web.feeds.news import BasicNewsRecipe -from calibre.web.feeds.news import BasicNewsRecipe - class B92(BasicNewsRecipe): title = 'B92' __author__ = 'Darko Miletic' @@ -22,19 +21,22 @@ class B92(BasicNewsRecipe): no_stylesheets = True use_embedded_content = False cover_url = 'http://static.b92.net/images/fp/logo.gif' - extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "monospace1";src:url(res:///opt/sony/ebook/FONT/tt0419m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{text-align: left; font-family: serif1, serif} .article_date{font-family: monospace1, monospace} .article_description{font-family: sans1, sans-serif} .navbar{font-family: monospace1, monospace}' + language = _('Serbian') + extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: sans1, sans-serif}' + + html2lrf_options = [ + '--comment' , description + , '--category' , category + , '--publisher', publisher + , '--ignore-tables' + ] + + html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True' keep_only_tags = [ dict(name='div', attrs={'class':'sama_vest'}) ] - html2lrf_options = [ - '--comment', description - , '--category', category - , '--publisher', publisher - ] - - html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"' - preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')] + feeds = [ (u'Vesti', u'http://www.b92.net/info/rss/vesti.xml') ,(u'Biz' , u'http://www.b92.net/info/rss/biz.xml' ) @@ -54,9 +56,10 @@ class B92(BasicNewsRecipe): return nurl def preprocess_html(self, soup): - soup.html['xml:lang'] = 'sr-Latn' - soup.html['lang'] = 'sr-Latn' - mtag = '' + lng = 'sr-Latn-RS' + soup.html['xml:lang'] = lng + soup.html['lang'] = lng + mtag = '' soup.head.insert(0,mtag) for item in soup.findAll(style=True): del item['style'] @@ -64,4 +67,3 @@ class B92(BasicNewsRecipe): del item['align'] item.insert(0,'

') return soup - language = _('Serbian') \ No newline at end of file diff --git a/src/calibre/web/feeds/recipes/recipe_blic.py b/src/calibre/web/feeds/recipes/recipe_blic.py index ae75394fec..05d4e43865 100644 --- a/src/calibre/web/feeds/recipes/recipe_blic.py +++ b/src/calibre/web/feeds/recipes/recipe_blic.py @@ -1,15 +1,14 @@ #!/usr/bin/env python __license__ = 'GPL v3' -__copyright__ = '2008, Darko Miletic ' +__copyright__ = '2008-2009, Darko Miletic ' ''' blic.rs ''' import re +from calibre.web.feeds.news import BasicNewsRecipe -from calibre.web.feeds.news import BasicNewsRecipe - class Blic(BasicNewsRecipe): title = u'Blic' __author__ = u'Darko Miletic' @@ -21,15 +20,17 @@ class Blic(BasicNewsRecipe): remove_javascript = True no_stylesheets = True use_embedded_content = False - extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "monospace1";src:url(res:///opt/sony/ebook/FONT/tt0419m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{text-align: left; font-family: serif1, serif} .article_date{font-family: monospace1, monospace} .article_description{font-family: sans1, sans-serif} .navbar{font-family: monospace1, monospace}' + language = _('Serbian') + extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: sans1, sans-serif}' html2lrf_options = [ - '--comment', description - , '--category', category + '--comment' , description + , '--category' , category , '--publisher', publisher + , '--ignore-tables' ] - html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"' + html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True' preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')] @@ -44,10 +45,9 @@ class Blic(BasicNewsRecipe): return u'http://www.blic.rs/_print.php?' + rest_url def preprocess_html(self, soup): - mtag = '' + mtag = '' soup.head.insert(0,mtag) for item in soup.findAll(style=True): del item['style'] return soup - - language = _('Serbian') \ No newline at end of file + \ No newline at end of file diff --git a/src/calibre/web/feeds/recipes/recipe_danas.py b/src/calibre/web/feeds/recipes/recipe_danas.py index f9c05e7b20..63a7b45738 100644 --- a/src/calibre/web/feeds/recipes/recipe_danas.py +++ b/src/calibre/web/feeds/recipes/recipe_danas.py @@ -1,14 +1,13 @@ #!/usr/bin/env python __license__ = 'GPL v3' -__copyright__ = '2008, Darko Miletic ' +__copyright__ = '2008-2009, Darko Miletic ' ''' danas.rs ''' import re +from calibre.web.feeds.news import BasicNewsRecipe -from calibre.web.feeds.news import BasicNewsRecipe - class Danas(BasicNewsRecipe): title = u'Danas' __author__ = 'Darko Miletic' @@ -20,15 +19,17 @@ class Danas(BasicNewsRecipe): no_stylesheets = False remove_javascript = True use_embedded_content = False - extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "monospace1";src:url(res:///opt/sony/ebook/FONT/tt0419m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{text-align: left; font-family: serif1, serif} .article_date{font-family: monospace1, monospace} .article_description{font-family: sans1, sans-serif} .navbar{font-family: monospace1, monospace}' + language = _('Serbian') + extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: sans1, sans-serif}' html2lrf_options = [ - '--comment', description - , '--category', category + '--comment' , description + , '--category' , category , '--publisher', publisher + , '--ignore-tables' ] - html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"' + html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True' preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')] @@ -43,9 +44,8 @@ class Danas(BasicNewsRecipe): feeds = [ (u'Vesti', u'http://www.danas.rs/rss/rss.asp')] def preprocess_html(self, soup): - mtag = '' + mtag = '' soup.head.insert(0,mtag) for item in soup.findAll(style=True): del item['style'] - return soup - language = _('Serbian') \ No newline at end of file + return soup \ No newline at end of file diff --git a/src/calibre/web/feeds/recipes/recipe_elargentino.py b/src/calibre/web/feeds/recipes/recipe_elargentino.py index 1801c81b81..5c7d314f24 100644 --- a/src/calibre/web/feeds/recipes/recipe_elargentino.py +++ b/src/calibre/web/feeds/recipes/recipe_elargentino.py @@ -5,9 +5,8 @@ __copyright__ = '2008-2009, Darko Miletic ' ''' elargentino.com ''' +from calibre.web.feeds.news import BasicNewsRecipe -from calibre.web.feeds.news import BasicNewsRecipe - class ElArgentino(BasicNewsRecipe): title = 'ElArgentino.com' __author__ = 'Darko Miletic' @@ -21,9 +20,10 @@ class ElArgentino(BasicNewsRecipe): use_embedded_content = False encoding = 'utf8' cover_url = 'http://www.elargentino.com/TemplateWeb/MediosFooter/tapa_elargentino.png' + language = _('Spanish') html2lrf_options = [ - '--comment', description + '--comment', description , '--category', category , '--publisher', publisher ] @@ -59,5 +59,3 @@ class ElArgentino(BasicNewsRecipe): for item in soup.findAll(style=True): del item['style'] return soup - - language = _('Spanish') \ No newline at end of file diff --git a/src/calibre/web/feeds/recipes/recipe_granma.py b/src/calibre/web/feeds/recipes/recipe_granma.py index 66ebba1d64..c758477a52 100644 --- a/src/calibre/web/feeds/recipes/recipe_granma.py +++ b/src/calibre/web/feeds/recipes/recipe_granma.py @@ -1,14 +1,12 @@ #!/usr/bin/env python __license__ = 'GPL v3' -__copyright__ = '2008, Darko Miletic ' +__copyright__ = '2008-2009, Darko Miletic ' ''' granma.cubaweb.cu ''' import urllib -from calibre.web.feeds.news import BasicNewsRecipe - class Granma(BasicNewsRecipe): title = 'Diario Granma' __author__ = 'Darko Miletic' @@ -21,18 +19,21 @@ class Granma(BasicNewsRecipe): use_embedded_content = False encoding = 'cp1252' cover_url = 'http://www.granma.cubaweb.cu/imagenes/granweb229d.jpg' + language = _('Spanish') remove_javascript = True html2lrf_options = [ - '--comment', description - , '--category', category + '--comment' , description + , '--category' , category , '--publisher', publisher , '--ignore-tables' ] - html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"' + html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True' keep_only_tags = [dict(name='table', attrs={'height':'466'})] + + remove_tags = [dict(name=['embed','link','object'])] feeds = [(u'Noticias', u'http://www.granma.cubaweb.cu/noticias.xml' )] @@ -48,5 +49,4 @@ class Granma(BasicNewsRecipe): for item in soup.findAll(style=True): del item['style'] return soup - - language = _('Spanish') \ No newline at end of file + \ No newline at end of file diff --git a/src/calibre/web/feeds/recipes/recipe_infobae.py b/src/calibre/web/feeds/recipes/recipe_infobae.py index 40e720f94c..13c52ca6b1 100644 --- a/src/calibre/web/feeds/recipes/recipe_infobae.py +++ b/src/calibre/web/feeds/recipes/recipe_infobae.py @@ -6,29 +6,36 @@ __copyright__ = '2008-2009, Darko Miletic ' infobae.com ''' -from calibre.web.feeds.news import BasicNewsRecipe - +from calibre.web.feeds.news import BasicNewsRecipe + class Infobae(BasicNewsRecipe): title = 'Infobae.com' __author__ = 'Darko Miletic' description = 'Informacion Libre las 24 horas' publisher = 'Infobae.com' category = 'news, politics, Argentina' - oldest_article = 2 + oldest_article = 1 max_articles_per_feed = 100 no_stylesheets = True use_embedded_content = False + language = _('Spanish') encoding = 'iso-8859-1' cover_url = 'http://www.infobae.com/imgs/header/header.gif' remove_javascript = True html2lrf_options = [ - '--comment', description - , '--category', category + '--comment' , description + , '--category' , category , '--publisher', publisher + , '--ignore-tables' ] - html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"' + html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True' + + remove_tags = [ + dict(name=['embed','link','object']) + ,dict(name='a', attrs={'onclick':'javascript:window.print()'}) + ] feeds = [ (u'Noticias' , u'http://www.infobae.com/adjuntos/html/RSS/hoy.xml' ) @@ -48,5 +55,3 @@ class Infobae(BasicNewsRecipe): for item in soup.findAll(style=True): del item['style'] return soup - - language = _('Spanish') \ No newline at end of file diff --git a/src/calibre/web/feeds/recipes/recipe_jutarnji.py b/src/calibre/web/feeds/recipes/recipe_jutarnji.py index 03c22c9b99..e8826bc4e1 100644 --- a/src/calibre/web/feeds/recipes/recipe_jutarnji.py +++ b/src/calibre/web/feeds/recipes/recipe_jutarnji.py @@ -1,47 +1,46 @@ #!/usr/bin/env python __license__ = 'GPL v3' -__copyright__ = '2008, Darko Miletic ' +__copyright__ = '2008-2009, Darko Miletic ' ''' jutarnji.hr ''' import re +from calibre.web.feeds.news import BasicNewsRecipe -from calibre.web.feeds.news import BasicNewsRecipe - class Jutarnji(BasicNewsRecipe): title = u'Jutarnji' __author__ = u'Darko Miletic' description = u'Hrvatski portal' publisher = 'Jutarnji.hr' category = 'news, politics, Croatia' - oldest_article = 2 + oldest_article = 1 max_articles_per_feed = 100 - simultaneous_downloads = 1 - delay = 1 - language = _('Croatian') + simultaneous_downloads = 2 + delay = 1 + language = _('Croatian') no_stylesheets = True use_embedded_content = False remove_javascript = True encoding = 'cp1250' - extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "monospace1";src:url(res:///opt/sony/ebook/FONT/tt0419m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{text-align: left; font-family: serif1, serif} .article_date{font-family: monospace1, monospace} .article_description{font-family: sans1, sans-serif} .navbar{font-family: monospace1, monospace}' + extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{text-align: justify; font-family: serif1, serif} .article_description{font-family: sans1, sans-serif}' html2lrf_options = [ - '--comment', description - , '--category', category + '--comment' , description + , '--category' , category , '--publisher', publisher + , '--ignore-tables' ] - html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"' + html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True' preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')] remove_tags = [ - dict(name='embed') + dict(name=['embed','hr','link','object']) ,dict(name='a', attrs={'class':'a11'}) - ,dict(name='hr') ] feeds = [ @@ -60,13 +59,11 @@ class Jutarnji(BasicNewsRecipe): return 'http://www.jutarnji.hr/ispis_clanka.jl?artid=' + rrest def preprocess_html(self, soup): - mtag = '' + mtag = '\n' soup.head.insert(0,mtag) - mtag = '' - soup.head.insert(0,mtag) for item in soup.findAll(style=True): del item['style'] for item in soup.findAll(width=True): del item['width'] return soup - + \ No newline at end of file diff --git a/src/calibre/web/feeds/recipes/recipe_juventudrebelde.py b/src/calibre/web/feeds/recipes/recipe_juventudrebelde.py index eea510a7cd..bb8e645fbe 100644 --- a/src/calibre/web/feeds/recipes/recipe_juventudrebelde.py +++ b/src/calibre/web/feeds/recipes/recipe_juventudrebelde.py @@ -1,14 +1,14 @@ #!/usr/bin/env python __license__ = 'GPL v3' -__copyright__ = '2008, Darko Miletic ' +__copyright__ = '2008-2009, Darko Miletic ' ''' juventudrebelde.cu ''' -from calibre import strftime -from calibre.web.feeds.news import BasicNewsRecipe - +from calibre import strftime +from calibre.web.feeds.news import BasicNewsRecipe + class Juventudrebelde(BasicNewsRecipe): title = 'Juventud Rebelde' __author__ = 'Darko Miletic' @@ -20,17 +20,18 @@ class Juventudrebelde(BasicNewsRecipe): no_stylesheets = True use_embedded_content = False encoding = 'cp1252' + language = _('Spanish') cover_url = strftime('http://www.juventudrebelde.cu/UserFiles/File/impreso/iportada-%Y-%m-%d.jpg') remove_javascript = True html2lrf_options = [ - '--comment', description - , '--category', category + '--comment' , description + , '--category' , category , '--publisher', publisher , '--ignore-tables' ] - html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"' + html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True' keep_only_tags = [dict(name='div', attrs={'id':'noticia'})] @@ -50,5 +51,4 @@ class Juventudrebelde(BasicNewsRecipe): for item in soup.findAll(style=True): del item['style'] return soup - - language = _('Spanish') \ No newline at end of file + \ No newline at end of file diff --git a/src/calibre/web/feeds/recipes/recipe_nin.py b/src/calibre/web/feeds/recipes/recipe_nin.py index d180f2b221..85019b07ea 100644 --- a/src/calibre/web/feeds/recipes/recipe_nin.py +++ b/src/calibre/web/feeds/recipes/recipe_nin.py @@ -1,15 +1,14 @@ #!/usr/bin/env python __license__ = 'GPL v3' -__copyright__ = '2008, Darko Miletic ' +__copyright__ = '2008-2009, Darko Miletic ' ''' nin.co.yu ''' import re, urllib +from calibre.web.feeds.news import BasicNewsRecipe -from calibre.web.feeds.news import BasicNewsRecipe - class Nin(BasicNewsRecipe): title = 'NIN online' __author__ = 'Darko Miletic' @@ -27,15 +26,17 @@ class Nin(BasicNewsRecipe): LOGIN = PREFIX + '/?logout=true' remove_javascript = True use_embedded_content = False - extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "monospace1";src:url(res:///opt/sony/ebook/FONT/tt0419m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{text-align: left; font-family: serif1, serif} .article_date{font-family: monospace1, monospace} .article_description{font-family: sans1, sans-serif} .navbar{font-family: monospace1, monospace}' - + language = _('Serbian') + extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{text-align: justify; font-family: serif1, serif} .article_description{font-family: sans1, sans-serif}' + html2lrf_options = [ - '--comment', description - , '--category', category + '--comment' , description + , '--category' , category , '--publisher', publisher + , '--ignore-tables' ] - html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"' + html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True' preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')] @@ -69,5 +70,3 @@ class Nin(BasicNewsRecipe): for item in soup.findAll(style=True): del item['style'] return soup - - language = _('Serbian') \ No newline at end of file diff --git a/src/calibre/web/feeds/recipes/recipe_novosti.py b/src/calibre/web/feeds/recipes/recipe_novosti.py index 136302c573..0190307542 100644 --- a/src/calibre/web/feeds/recipes/recipe_novosti.py +++ b/src/calibre/web/feeds/recipes/recipe_novosti.py @@ -1,15 +1,14 @@ #!/usr/bin/env python __license__ = 'GPL v3' -__copyright__ = '2008, Darko Miletic ' +__copyright__ = '2008-2009, Darko Miletic ' ''' novosti.rs ''' import re +from calibre.web.feeds.news import BasicNewsRecipe -from calibre.web.feeds.news import BasicNewsRecipe - class Novosti(BasicNewsRecipe): title = u'Vecernje Novosti' __author__ = u'Darko Miletic' @@ -22,15 +21,17 @@ class Novosti(BasicNewsRecipe): use_embedded_content = False encoding = 'utf8' remove_javascript = True - extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "monospace1";src:url(res:///opt/sony/ebook/FONT/tt0419m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{text-align: left; font-family: serif1, serif} .article_date{font-family: monospace1, monospace} .article_description{font-family: sans1, sans-serif} .navbar{font-family: monospace1, monospace}' + language = _('Serbian') + extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: sans1, sans-serif}' html2lrf_options = [ - '--comment', description - , '--category', category + '--comment' , description + , '--category' , category , '--publisher', publisher + , '--ignore-tables' ] - html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"' + html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True' preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')] @@ -40,10 +41,8 @@ class Novosti(BasicNewsRecipe): feeds = [(u'Vesti', u'http://www.novosti.rs/php/vesti/rss.php')] def preprocess_html(self, soup): - mtag = '' + mtag = '' soup.head.insert(0,mtag) for item in soup.findAll(style=True): del item['style'] return soup - - language = _('Serbian') \ No newline at end of file diff --git a/src/calibre/web/feeds/recipes/recipe_nspm.py b/src/calibre/web/feeds/recipes/recipe_nspm.py index 4cc6d50ca0..0ff80b8a93 100644 --- a/src/calibre/web/feeds/recipes/recipe_nspm.py +++ b/src/calibre/web/feeds/recipes/recipe_nspm.py @@ -1,41 +1,44 @@ #!/usr/bin/env python __license__ = 'GPL v3' -__copyright__ = '2008, Darko Miletic ' +__copyright__ = '2008-2009, Darko Miletic ' ''' nspm.rs ''' import re +from calibre.web.feeds.news import BasicNewsRecipe -from calibre.web.feeds.news import BasicNewsRecipe - class Nspm(BasicNewsRecipe): title = u'Nova srpska politicka misao' __author__ = 'Darko Miletic' description = 'Casopis za politicku teoriju i drustvena istrazivanja' publisher = 'NSPM' category = 'news, politics, Serbia' - oldest_article = 7 + oldest_article = 2 max_articles_per_feed = 100 no_stylesheets = True use_embedded_content = False INDEX = 'http://www.nspm.rs/?alphabet=l' encoding = 'utf8' remove_javascript = True - extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "monospace1";src:url(res:///opt/sony/ebook/FONT/tt0419m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{text-align: left; font-family: serif1, serif} .article_date{font-family: monospace1, monospace} .article_description{font-family: sans1, sans-serif} .navbar{font-family: monospace1, monospace}' + language = _('Serbian') + extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{text-align: justify; font-family: serif1, serif} .article_description{font-family: sans1, sans-serif}' html2lrf_options = [ - '--comment', description - , '--category', category + '--comment' , description + , '--category' , category , '--publisher', publisher , '--ignore-tables' ] - html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"' + html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True' preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')] - remove_tags = [dict(name='a')] + remove_tags = [ + dict(name=['a','img','link','object','embed']) + ,dict(name='td', attrs={'class':'buttonheading'}) + ] def get_browser(self): br = BasicNewsRecipe.get_browser() @@ -48,13 +51,12 @@ class Nspm(BasicNewsRecipe): return url.replace('.html','/stampa.html') def preprocess_html(self, soup): - soup.html['xml:lang'] = 'sr-Latn-RS' - soup.html['lang'] = 'sr-Latn-RS' + lng = 'sr-Latn-RS' + soup.html['xml:lang'] = lng + soup.html['lang'] = lng ftag = soup.find('meta',attrs={'http-equiv':'Content-Language'}) if ftag: - ftag['content'] = 'sr-Latn-RS' + ftag['content'] = lng for item in soup.findAll(style=True): - del item['style'] + del item['style'] return soup - - language = _('Serbian') \ No newline at end of file diff --git a/src/calibre/web/feeds/recipes/recipe_pescanik.py b/src/calibre/web/feeds/recipes/recipe_pescanik.py index e3385e02aa..278ed38183 100644 --- a/src/calibre/web/feeds/recipes/recipe_pescanik.py +++ b/src/calibre/web/feeds/recipes/recipe_pescanik.py @@ -1,45 +1,46 @@ #!/usr/bin/env python __license__ = 'GPL v3' -__copyright__ = '2008, Darko Miletic ' +__copyright__ = '2008-2009, Darko Miletic ' ''' pescanik.net ''' import re +from calibre.web.feeds.news import BasicNewsRecipe -from calibre.web.feeds.news import BasicNewsRecipe - class Pescanik(BasicNewsRecipe): title = 'Pescanik' __author__ = 'Darko Miletic' description = 'Pescanik' publisher = 'Pescanik' category = 'news, politics, Serbia' - oldest_article = 7 + oldest_article = 5 max_articles_per_feed = 100 no_stylesheets = True use_embedded_content = False remove_javascript = True encoding = 'utf8' - extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "monospace1";src:url(res:///opt/sony/ebook/FONT/tt0419m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{text-align: left; font-family: serif1, serif} .article_date{font-family: monospace1, monospace} .article_description{font-family: sans1, sans-serif} .navbar{font-family: monospace1, monospace}' + cover_url = "http://pescanik.net/templates/ja_teline/images/logo.png" + language = _('Serbian') + extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: sans1, sans-serif}' html2lrf_options = [ - '--comment', description - , '--category', category + '--comment' , description + , '--category' , category , '--publisher', publisher + , '--ignore-tables' ] - html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"' + html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True' - cover_url = "http://pescanik.net/templates/ja_teline/images/logo.png" preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')] remove_tags = [ dict(name='td' , attrs={'class':'buttonheading'}) ,dict(name='span', attrs={'class':'article_seperator'}) - ,dict(name=['object','link']) + ,dict(name=['object','link','img','h4','ul']) ] feeds = [(u'Pescanik Online', u'http://pescanik.net/index.php?option=com_rd_rss&id=12')] @@ -54,5 +55,3 @@ class Pescanik(BasicNewsRecipe): for item in soup.findAll(style=True): del item['style'] return soup - - language = _('Serbian') \ No newline at end of file diff --git a/src/calibre/web/feeds/recipes/recipe_politika.py b/src/calibre/web/feeds/recipes/recipe_politika.py index f1d84915ce..93c8f43b36 100644 --- a/src/calibre/web/feeds/recipes/recipe_politika.py +++ b/src/calibre/web/feeds/recipes/recipe_politika.py @@ -6,9 +6,8 @@ __copyright__ = '2008, Darko Miletic ' politika.rs ''' import re +from calibre.web.feeds.news import BasicNewsRecipe -from calibre.web.feeds.news import BasicNewsRecipe - class Politika(BasicNewsRecipe): title = u'Politika Online' __author__ = 'Darko Miletic' @@ -16,16 +15,16 @@ class Politika(BasicNewsRecipe): publisher = 'Politika novine i Magazini d.o.o' category = 'news, politics, Serbia' oldest_article = 2 - language = _('Serbian') max_articles_per_feed = 100 no_stylesheets = True use_embedded_content = False remove_javascript = True encoding = 'utf8' - extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "monospace1";src:url(res:///opt/sony/ebook/FONT/tt0419m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{text-align: left; font-family: serif1, serif} .article_date{font-family: monospace1, monospace} .article_description{font-family: sans1, sans-serif} .navbar{font-family: monospace1, monospace}' + language = _('Serbian') + extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: sans1, sans-serif}' html2lrf_options = [ - '--comment', description + '--comment', description , '--category', category , '--publisher', publisher ] @@ -61,6 +60,6 @@ class Politika(BasicNewsRecipe): for item in soup.findAll(style=True): del item['style'] ftag = soup.find('div',attrs={'class':'content_center_border'}) - if ftag: - ftag['align'] = 'left' + if ftag.has_key('align'): + del ftag['align'] return soup diff --git a/src/calibre/web/feeds/recipes/recipe_vijesti.py b/src/calibre/web/feeds/recipes/recipe_vijesti.py index 98a7736a96..9923193d7b 100644 --- a/src/calibre/web/feeds/recipes/recipe_vijesti.py +++ b/src/calibre/web/feeds/recipes/recipe_vijesti.py @@ -4,13 +4,12 @@ __license__ = 'GPL v3' __copyright__ = '2009, Darko Miletic ' ''' -vijesti.cg.yu +vijesti.me ''' import re +from calibre.web.feeds.news import BasicNewsRecipe -from calibre.web.feeds.news import BasicNewsRecipe - class Vijesti(BasicNewsRecipe): title = 'Vijesti' __author__ = 'Darko Miletic' @@ -22,13 +21,14 @@ class Vijesti(BasicNewsRecipe): no_stylesheets = True remove_javascript = True encoding = 'cp1250' - cover_url = 'http://www.vijesti.cg.yu/img/logo.gif' + cover_url = 'http://www.vijesti.me/img/logo.gif' remove_javascript = True use_embedded_content = False - extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "monospace1";src:url(res:///opt/sony/ebook/FONT/tt0419m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{text-align: left; font-family: serif1, serif} .article_date{font-family: monospace1, monospace} .article_description{font-family: sans1, sans-serif} .navbar{font-family: monospace1, monospace}' + language = _('Serbian') + extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{text-align: justify; font-family: serif1, serif} .article_description{font-family: sans1, sans-serif}' html2lrf_options = [ - '--comment', description + '--comment', description , '--category', category , '--publisher', publisher ] @@ -39,12 +39,9 @@ class Vijesti(BasicNewsRecipe): keep_only_tags = [dict(name='div', attrs={'id':'mainnews'})] - remove_tags = [ - dict(name='div', attrs={'align':'right'}) - ,dict(name=['object','link']) - ] + remove_tags = [dict(name=['object','link','embed'])] - feeds = [(u'Sve vijesti', u'http://www.vijesti.cg.yu/rss.php' )] + feeds = [(u'Sve vijesti', u'http://www.vijesti.me/rss.php' )] def preprocess_html(self, soup): soup.html['xml:lang'] = 'sr-Latn-ME' @@ -56,5 +53,3 @@ class Vijesti(BasicNewsRecipe): del item['align'] item.insert(0,'

') return soup - - language = _('Serbian') \ No newline at end of file diff --git a/src/calibre/web/feeds/recipes/recipe_vreme.py b/src/calibre/web/feeds/recipes/recipe_vreme.py index 27697acf8e..c78e956d29 100644 --- a/src/calibre/web/feeds/recipes/recipe_vreme.py +++ b/src/calibre/web/feeds/recipes/recipe_vreme.py @@ -1,16 +1,15 @@ #!/usr/bin/env python __license__ = 'GPL v3' -__copyright__ = '2008, Darko Miletic ' +__copyright__ = '2008-2009, Darko Miletic ' ''' vreme.com ''' import re from calibre import strftime +from calibre.web.feeds.news import BasicNewsRecipe -from calibre.web.feeds.news import BasicNewsRecipe - class Vreme(BasicNewsRecipe): title = 'Vreme' __author__ = 'Darko Miletic' @@ -24,15 +23,17 @@ class Vreme(BasicNewsRecipe): LOGIN = 'http://www.vreme.com/account/index.php' remove_javascript = True use_embedded_content = False - extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "monospace1";src:url(res:///opt/sony/ebook/FONT/tt0419m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{text-align: left; font-family: serif1, serif} .article_date{font-family: monospace1, monospace} .article_description{font-family: sans1, sans-serif} .navbar{font-family: monospace1, monospace}' + language = _('Serbian') + extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: sans1, sans-serif}' html2lrf_options = [ - '--comment', description - , '--category', category + '--comment' , description + , '--category' , category , '--publisher', publisher + , '--ignore-tables' ] - html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"' + html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True' preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')] @@ -87,14 +88,19 @@ class Vreme(BasicNewsRecipe): del soup.body['text' ] del soup.body['bgcolor'] del soup.body['onload' ] - mtag = '' + for item in soup.findAll('table'): + if item.has_key('width'): + del item['width'] + if item.has_key('height'): + del item['height'] + mtag = '' soup.head.insert(0,mtag) tbl = soup.body.table tbbb = soup.find('td') if tbbb: tbbb.extract() tbl.extract() - soup.body.insert(0,tbbb) + soup.body.insert(0,tbbb) return soup def get_cover_url(self): @@ -104,5 +110,3 @@ class Vreme(BasicNewsRecipe): if cover_item: cover_url = self.INDEX + cover_item['src'] return cover_url - - language = _('Serbian') \ No newline at end of file