Fix #1791 (Major recipe update)

2025-07-09 03:04:10 -04:00 · 2009-02-07 13:39:53 -08:00 · 2009-02-07 13:39:53 -08:00 · 1145b768dc
commit 1145b768dc
parent 1c9c8870d2
31 changed files with 808 additions and 394 deletions
--- a/src/calibre/web/feeds/recipes/recipe_ambito.py
+++ b/src/calibre/web/feeds/recipes/recipe_ambito.py
@ -1,32 +1,39 @@
 #!/usr/bin/env  python
 __license__   = 'GPL v3'
-__copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
+__copyright__ = '2008-2009, Darko Miletic <darko.miletic at gmail.com>'
 '''
 ambito.com
 '''
 from calibre.web.feeds.news import BasicNewsRecipe
 class Ambito(BasicNewsRecipe):
    title                 = 'Ambito.com'
    __author__            = 'Darko Miletic'
    description           = 'Informacion Libre las 24 horas'
    publisher             = 'Ambito.com'
    category              = 'news, politics, Argentina'
    oldest_article        = 2
    max_articles_per_feed = 100
    no_stylesheets        = True
-    use_embedded_content  = False
+    encoding              = 'iso-8859-1'
    encoding              = 'iso--8859-1'
    language              = _('Spanish')
    cover_url             = 'http://www.ambito.com/img/logo_.jpg'
    remove_javascript     = True
    use_embedded_content  = False
    html2lrf_options = [
                          '--comment', description
-                        , '--category'      , 'news, Argentina'
+                        , '--category', category
-                        , '--publisher'     , title
+                        , '--publisher', publisher
                        ]
    html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"' 
    keep_only_tags    = [dict(name='div', attrs={'align':'justify'})]
    remove_tags       = [dict(name=['object','link'])]
    feeds = [ 
              (u'Principales Noticias', u'http://www.ambito.com/rss/noticiasp.asp'                         )
             ,(u'Economia'            , u'http://www.ambito.com/rss/noticias.asp?S=Econom%EDa'             )
@ -43,3 +50,12 @@ class Ambito(BasicNewsRecipe):
    def print_version(self, url):
        return url.replace('http://www.ambito.com/noticia.asp?','http://www.ambito.com/noticias/imprimir.asp?')
    def preprocess_html(self, soup):
        mtag = '<meta http-equiv="Content-Language" content="es-AR"/>'
        soup.head.insert(0,mtag)    
        for item in soup.findAll(style=True):
            del item['style']
        return soup
    language              = _('Spanish')
--- a/src/calibre/web/feeds/recipes/recipe_b92.py
+++ b/src/calibre/web/feeds/recipes/recipe_b92.py
@ -7,25 +7,33 @@ b92.net
 '''
 import re
 from calibre.web.feeds.news import BasicNewsRecipe
 class B92(BasicNewsRecipe):
-    title                 = u'B92'
+    title                 = 'B92'
    __author__            = 'Darko Miletic'
    language = _('Serbian')
    description           = 'Dnevne vesti iz Srbije i sveta'    
-    oldest_article        = 7
+    oldest_article        = 2
    publisher             = 'B92.net'
    category              = 'news, politics, Serbia'    
    max_articles_per_feed = 100
    remove_javascript     = True
    no_stylesheets        = True
    use_embedded_content  = False
    cover_url = 'http://static.b92.net/images/fp/logo.gif'
    extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "monospace1";src:url(res:///opt/sony/ebook/FONT/tt0419m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{text-align: left; font-family: serif1, serif} .article_date{font-family: monospace1, monospace} .article_description{font-family: sans1, sans-serif} .navbar{font-family: monospace1, monospace}'
    keep_only_tags = [ dict(name='div', attrs={'class':'sama_vest'}) ]
    html2lrf_options = [
                          '--comment', description
-                        , '--base-font-size', '10'
+                        , '--category', category
-                        , '--category', 'news, Serbia'
+                        , '--publisher', publisher
                        , '--publisher', 'B92'
                        ]
    html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"' 
    preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
    feeds          = [
                        (u'Vesti', u'http://www.b92.net/info/rss/vesti.xml')
@ -44,3 +52,16 @@ class B92(BasicNewsRecipe):
        if biz:
            nurl = 'http://www.b92.net/mobilni/biz/index.php?nav_id=' + article_id
        return nurl
    def preprocess_html(self, soup):
        soup.html['xml:lang'] = 'sr-Latn'
        soup.html['lang']     = 'sr-Latn'
        mtag = '<meta http-equiv="Content-Language" content="sr-Latn"/>'
        soup.head.insert(0,mtag)    
        for item in soup.findAll(style=True):
            del item['style']
        for item in soup.findAll(name='img',align=True):
            del item['align']
            item.insert(0,'<br /><br />')
        return soup
    language = _('Serbian')
--- a/src/calibre/web/feeds/recipes/recipe_blic.py
+++ b/src/calibre/web/feeds/recipes/recipe_blic.py
@ -5,32 +5,49 @@ __copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
 '''
 blic.rs
 '''
-import string,re
+
 import re
 from calibre.web.feeds.news import BasicNewsRecipe
 class Blic(BasicNewsRecipe):
    title                 = u'Blic'
-    __author__            = 'Darko Miletic'
+    __author__            = u'Darko Miletic'
-    description           = 'Blic.rs online verzija najtiraznije novine u Srbiji donosi najnovije vesti iz Srbije i sveta, komentare, politicke analize, poslovne i ekonomske vesti, vesti iz regiona, intervjue, informacije iz kulture, reportaze, pokriva sve sportske dogadjaje, detaljan tv program, nagradne igre, zabavu, fenomenalni Blic strip, dnevni horoskop, arhivu svih dogadjaja'
+    description           = u'Blic.co.yu online verzija najtiraznije novine u Srbiji donosi najnovije vesti iz Srbije i sveta, komentare, politicke analize, poslovne i ekonomske vesti, vesti iz regiona, intervjue, informacije iz kulture, reportaze, pokriva sve sportske dogadjaje, detaljan tv program, nagradne igre, zabavu, fenomenalni Blic strip, dnevni horoskop, arhivu svih dogadjaja'    
-    oldest_article        = 7
+    publisher             = 'RINGIER d.o.o.'
-    language              = _('Serbian')
+    category              = 'news, politics, Serbia'
    oldest_article        = 2
    max_articles_per_feed = 100
    remove_javascript     = True
    no_stylesheets        = True
    use_embedded_content  = False
-    cover_url = 'http://www.blic.rs/resources/images/header_back_tile.png'
+    extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "monospace1";src:url(res:///opt/sony/ebook/FONT/tt0419m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{text-align: left; font-family: serif1, serif} .article_date{font-family: monospace1, monospace} .article_description{font-family: sans1, sans-serif} .navbar{font-family: monospace1, monospace}'
    html2lrf_options = [
                          '--comment', description
-                        , '--base-font-size', '10'
+                        , '--category', category
-                        , '--category', 'news, Serbia'
+                        , '--publisher', publisher
                        , '--publisher', 'Blic'
                        ]
    html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"' 
    preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
    keep_only_tags     = [dict(name='div', attrs={'class':'single_news'})]
    feeds              = [(u'Vesti', u'http://www.blic.rs/rssall.php')]
    remove_tags        = [dict(name=['object','link'])]
    def print_version(self, url):
        start_url, question, rest_url = url.partition('?')
        return u'http://www.blic.rs/_print.php?' + rest_url
    def preprocess_html(self, soup):
        mtag = '<meta http-equiv="Content-Language" content="sr-Latn"/>'
        soup.head.insert(0,mtag)    
        for item in soup.findAll(style=True):
            del item['style']
        return soup
    language              = _('Serbian')
--- a/src/calibre/web/feeds/recipes/recipe_clarin.py
+++ b/src/calibre/web/feeds/recipes/recipe_clarin.py
@ -1,32 +1,36 @@
 #!/usr/bin/env  python
 __license__   = 'GPL v3'
-__copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
+__copyright__ = '2008-2009, Darko Miletic <darko.miletic at gmail.com>'
 '''
 clarin.com
 '''
 from calibre import strftime
 from calibre.web.feeds.news import BasicNewsRecipe
 class Clarin(BasicNewsRecipe):
    title                 = 'Clarin'
    __author__            = 'Darko Miletic'
    description           = 'Noticias de Argentina y mundo'
    publisher             = 'Grupo Clarin'
    category              = 'news, politics, Argentina'
    oldest_article        = 2
    max_articles_per_feed = 100
    language = _('Spanish')
    use_embedded_content  = False
    no_stylesheets        = True
    cover_url             = strftime('http://www.clarin.com/diario/%Y/%m/%d/portada.jpg')
    remove_javascript     = True
    html2lrf_options = [
                          '--comment', description
-                        , '--base-font-size', '10'
+                        , '--category', category
-                        , '--category', 'news, Argentina'
+                        , '--publisher', publisher
                        , '--publisher', 'Grupo Clarin'
                        ]
    html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"' 
    remove_tags = [
                     dict(name='a'   , attrs={'class':'Imp'   })
                    ,dict(name='div' , attrs={'class':'Perma' })
@ -49,3 +53,12 @@ class Clarin(BasicNewsRecipe):
        rest  = artl.partition('-0')[-1]
        lmain = rest.partition('.')[0]
        return 'http://www.servicios.clarin.com/notas/jsp/clarin/v9/notas/imprimir.jsp?pagid=' + lmain
    def preprocess_html(self, soup):
        mtag = '<meta http-equiv="Content-Language" content="es-AR"/>'
        soup.head.insert(0,mtag)    
        for item in soup.findAll(style=True):
            del item['style']
        return soup
    language = _('Spanish')
--- a/src/calibre/web/feeds/recipes/recipe_danas.py
+++ b/src/calibre/web/feeds/recipes/recipe_danas.py
@ -5,38 +5,47 @@ __copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
 '''
 danas.rs
 '''
-import string,re
+import re
 from calibre.web.feeds.news import BasicNewsRecipe
 class Danas(BasicNewsRecipe):
-    title                 = 'Danas'
+    title                 = u'Danas'
    __author__            = 'Darko Miletic'
-    description           = 'Dnevne novine sa vestima iz sveta, politike, ekonomije, kulture, sporta, Beograda, Novog Sada i cele Srbije.'    
+    description           = 'Vesti'
    publisher             = 'Danas d.o.o.'
    category              = 'news, politics, Serbia'    
    oldest_article        = 2
    language              = _('Serbian')
    max_articles_per_feed = 100
-    no_stylesheets        = True
+    no_stylesheets        = False
    remove_javascript     = True
    use_embedded_content  = False
-    cover_url = 'http://www.danas.rs/images/basic/danas.gif'
+    extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "monospace1";src:url(res:///opt/sony/ebook/FONT/tt0419m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{text-align: left; font-family: serif1, serif} .article_date{font-family: monospace1, monospace} .article_description{font-family: sans1, sans-serif} .navbar{font-family: monospace1, monospace}'
    html2lrf_options = [
                          '--comment', description
-                        , '--base-font-size', '10'
+                        , '--category', category
-                        , '--category', 'news, Serbia'
+                        , '--publisher', publisher
                        , '--publisher', 'Danas'
                        ]
    html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"' 
    preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
    keep_only_tags     = [dict(name='div', attrs={'id':'left'})]
    remove_tags = [
-                     dict(name='div', attrs={'class':'width_1_4'  })
+                     dict(name='div', attrs={'class':['width_1_4','metaClanka','baner']})
                    ,dict(name='div', attrs={'class':'metaClanka' })
                    ,dict(name='div', attrs={'id':'comments'})
-                    ,dict(name='div', attrs={'class':'baner'      })
+                    ,dict(name=['object','link'])
                    ,dict(name='div', attrs={'class':'slikaClanka'})                    
                  ]
    feeds          = [ (u'Vesti', u'http://www.danas.rs/rss/rss.asp')]
-    def print_version(self, url):
+    def preprocess_html(self, soup):
-        return url + '&action=print'
+        mtag = '<meta http-equiv="Content-Language" content="sr-Latn"/>'
        soup.head.insert(0,mtag)    
        for item in soup.findAll(style=True):
            del item['style']
        return soup
    language              = _('Serbian')
--- a/src/calibre/web/feeds/recipes/recipe_el_mercurio_chile.py
+++ b/src/calibre/web/feeds/recipes/recipe_el_mercurio_chile.py
@ -5,32 +5,37 @@ __copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
 '''
 emol.com
 '''
 from calibre.web.feeds.news import BasicNewsRecipe
 class ElMercurio(BasicNewsRecipe):
    title                 = 'El Mercurio online'
    language = _('Spanish')
    __author__            = 'Darko Miletic'
    description           = 'El sitio de noticias online de Chile'
    publisher             = 'El Mercurio'
    category              = 'news, politics, Chile'        
    oldest_article        = 2
    max_articles_per_feed = 100
    no_stylesheets        = True
    use_embedded_content  = False
    encoding              = 'cp1252'
    cover_url             = 'http://www.emol.com/especiales/logo_emol/logo_emol.gif'
    remove_javascript     = True
    use_embedded_content  = False
    html2lrf_options = [
                          '--comment', description
-                        , '--category'      , 'news, Chile'
+                        , '--category', category
-                        , '--publisher'     , title
+                        , '--publisher', publisher
                        ]
    html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"' 
    keep_only_tags = [  
                        dict(name='div', attrs={'class':'despliegue-txt_750px'})
                       ,dict(name='div', attrs={'id':'div_cuerpo_participa'})
                     ]
    remove_tags = [  
                     dict(name='div', attrs={'class':'contenedor_despliegue-col-left300'})
                    ,dict(name='div', attrs={'id':['div_centro_dn_opc','div_cabezera','div_secciones','div_contenidos','div_pie','nav']})
@ -46,3 +51,11 @@ class ElMercurio(BasicNewsRecipe):
              ,(u'La Musica', u'http://www.emol.com/rss20/rss.asp?canal=7')
            ]
    def preprocess_html(self, soup):
        mtag = '<meta http-equiv="Content-Language" content="es-CL"/>'
        soup.head.insert(0,mtag)    
        for item in soup.findAll(style=True):
            del item['style']
        return soup
    language = _('Spanish')
--- a/src/calibre/web/feeds/recipes/recipe_elargentino.py
+++ b/src/calibre/web/feeds/recipes/recipe_elargentino.py
@ -1,7 +1,7 @@
 #!/usr/bin/env  python
 __license__   = 'GPL v3'
-__copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
+__copyright__ = '2008-2009, Darko Miletic <darko.miletic at gmail.com>'
 '''
 elargentino.com
 '''
@ -12,9 +12,11 @@ class ElArgentino(BasicNewsRecipe):
    title                 = 'ElArgentino.com'
    __author__            = 'Darko Miletic'
    description           = 'Informacion Libre las 24 horas'
-    language = _('Spanish')
+    publisher             = 'ElArgentino.com'
    category              = 'news, politics, Argentina'    
    oldest_article        = 2
    max_articles_per_feed = 100
    remove_javascript     = True
    no_stylesheets        = True
    use_embedded_content  = False
    encoding              = 'utf8'
@ -22,10 +24,12 @@ class ElArgentino(BasicNewsRecipe):
    html2lrf_options = [
                          '--comment', description
-                        , '--category'      , 'news, Argentina'
+                        , '--category', category
-                        , '--publisher'     , 'ElArgentino.com'
+                        , '--publisher', publisher
                        ]
    html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"' 
    remove_tags = [
                     dict(name='div', attrs={'id':'noprint'              })
                    ,dict(name='div', attrs={'class':'encabezadoImprimir'})
@ -50,7 +54,10 @@ class ElArgentino(BasicNewsRecipe):
        return u'http://www.elargentino.com/Impresion.aspx?Id=' + article_id
    def preprocess_html(self, soup):
-        mtag = '<meta http-equiv="Content-Type" content="text/html; charset=utf-8">'
+        mtag = '<meta http-equiv="Content-Type" content="text/html; charset=utf-8">\n<meta http-equiv="Content-Language" content="es-AR"/>\n'
        soup.head.insert(0,mtag)
-        soup.prettify()
+        for item in soup.findAll(style=True):
            del item['style']        
        return soup
    language = _('Spanish')
--- a/src/calibre/web/feeds/recipes/recipe_elmundo.py
+++ b/src/calibre/web/feeds/recipes/recipe_elmundo.py
@ -12,35 +12,49 @@ class ElMundo(BasicNewsRecipe):
    title                 = 'El Mundo'
    __author__            = 'Darko Miletic'
    description           = 'News from Spain'
-    language = _('Spanish')
+    publisher             = 'El Mundo'
    category              = 'news, politics, Spain'
    oldest_article        = 2
    max_articles_per_feed = 100
    no_stylesheets        = True
    use_embedded_content  = False
    encoding              = 'iso8859_15'
    cover_url             = 'http://estaticos02.cache.el-mundo.net/papel/imagenes/v2.0/logoverde.gif'
    remove_javascript     = True
    html2lrf_options = [
                          '--comment', description
-                        , '--category'      , 'news, Spain'
+                        , '--category', category
-                        , '--publisher'     , title
+                        , '--publisher', publisher
                        ]
-    keep_only_tags = [dict(name='div', attrs={'class':'noticia'})]
+    html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"' 
    keep_only_tags = [
                        dict(name='div', attrs={'id':['bloqueprincipal','noticia']})
                       ,dict(name='div', attrs={'class':['contenido_noticia_01']})
                     ]
    remove_tags = [
-                     dict(name='div', attrs={'class':['herramientas','publicidad_google','video','herramientasarriba','contenido_noticia_02']})
+                     dict(name='div', attrs={'class':['herramientas','publicidad_google']})
                    ,dict(name='div', attrs={'id':'modulo_multimedia' })
                    ,dict(name=['object','script','link', 'a'])
                    ,dict(name='ul', attrs={'class':'herramientas' })                                         
                    ,dict(name=['object','link'])
                  ]
    feeds = [ 
              (u'Portada'         , u'http://rss.elmundo.es/rss/descarga.htm?data2=4' )
             ,(u'Television'      , u'http://rss.elmundo.es/rss/descarga.htm?data2=76')
             ,(u'Espana'          , u'http://rss.elmundo.es/rss/descarga.htm?data2=8' )
             ,(u'Internacional'   , u'http://rss.elmundo.es/rss/descarga.htm?data2=9' )
             ,(u'Cultura'         , u'http://rss.elmundo.es/rss/descarga.htm?data2=6' )
             ,(u'Ciencia/Ecologia', u'http://rss.elmundo.es/rss/descarga.htm?data2=5' )
             ,(u'Comunicacion'    , u'http://rss.elmundo.es/rss/descarga.htm?data2=26')
             ,(u'Television'      , u'http://rss.elmundo.es/rss/descarga.htm?data2=76')
            ]
    def preprocess_html(self, soup):
        for item in soup.findAll(style=True):
            del item['style']
        return soup
    language = _('Spanish')
--- a/src/calibre/web/feeds/recipes/recipe_estadao.py
+++ b/src/calibre/web/feeds/recipes/recipe_estadao.py
@ -8,25 +8,28 @@ estadao.com.br
 from calibre.web.feeds.news import BasicNewsRecipe
 class Estadao(BasicNewsRecipe):
    title                 = 'O Estado de S. Paulo'
    __author__            = 'Darko Miletic'
-    description           = 'News from Brasil'
+    description           = 'News from Brasil in Portugese'
-    language = _('Portugese')    
+    publisher             = 'O Estado de S. Paulo'
    category              = 'news, politics, Brasil'    
    oldest_article        = 2
    max_articles_per_feed = 100
    no_stylesheets        = True
    use_embedded_content  = False
    encoding              = 'utf8'
    cover_url             = 'http://www.estadao.com.br/img/logo_estadao.png'
    remove_javascript     = True
    html2lrf_options = [
                          '--comment', description
-                        , '--category'      , 'news, Brasil'
+                        , '--category', category
-                        , '--publisher'     , title
+                        , '--publisher', publisher
                        ]
    html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"' 
    keep_only_tags = [dict(name='div', attrs={'id':'c1'})]
    remove_tags = [
@ -52,4 +55,8 @@ class Estadao(BasicNewsRecipe):
        ifr = soup.find('iframe')
        if ifr:
           ifr.extract()
        for item in soup.findAll(style=True):
            del item['style']
        return soup
    language = _('Portugese')    
--- a/src/calibre/web/feeds/recipes/recipe_granma.py
+++ b/src/calibre/web/feeds/recipes/recipe_granma.py
@ -7,37 +7,46 @@ granma.cubaweb.cu
 '''
 import urllib
 from calibre.web.feeds.news import BasicNewsRecipe
 class Granma(BasicNewsRecipe):
    title                 = 'Diario Granma'
    __author__            = 'Darko Miletic'
    language = _('Spanish')
    description           = 'Organo oficial del Comite Central del Partido Comunista de Cuba'    
    publisher             = 'Granma'
    category              = 'news, politics, Cuba'
    oldest_article        = 2
    max_articles_per_feed = 100
    no_stylesheets        = True
    use_embedded_content  = False
    encoding              = 'cp1252'
    cover_url             = 'http://www.granma.cubaweb.cu/imagenes/granweb229d.jpg'
    remove_javascript     = True
    html2lrf_options = [
                          '--comment', description
-                        , '--category'      , 'news, Cuba'
+                        , '--category', category
-                        , '--publisher'     , title
+                        , '--publisher', publisher
                        , '--ignore-tables'
                        ]
    html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"' 
    keep_only_tags = [dict(name='table', attrs={'height':'466'})]
    feeds = [(u'Noticias', u'http://www.granma.cubaweb.cu/noticias.xml' )]
    def preprocess_html(self, soup):
-        del soup.body.table['style']
+        mtag = '<meta http-equiv="Content-Language" content="es-CU"/>'
-        rtag = soup.find('td', attrs={'height':'458'})
+        soup.head.insert(0,mtag)
-        if rtag:
+        for item in soup.findAll('table'):
-            del rtag['style']
+            if item.has_key('width'):
               del item['width']
            if item.has_key('height'):
               del item['height']            
        for item in soup.findAll(style=True):
            del item['style']
        return soup
    language = _('Spanish')
--- a/src/calibre/web/feeds/recipes/recipe_harpers_full.py
+++ b/src/calibre/web/feeds/recipes/recipe_harpers_full.py
@ -1,7 +1,7 @@
 #!/usr/bin/env  python
 __license__   = 'GPL v3'
-__copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
+__copyright__ = '2008-2009, Darko Miletic <darko.miletic at gmail.com>'
 '''
 harpers.org - paid subscription/ printed issue articles
 This recipe only get's article's published in text format
@ -9,13 +9,15 @@ images and pdf's are ignored
 '''
 from calibre import strftime
 from calibre.web.feeds.news import BasicNewsRecipe
 class Harpers_full(BasicNewsRecipe):
    title                 = u"Harper's Magazine - articles from printed edition"
    __author__            = u'Darko Miletic'
    description           = u"Harper's Magazine: Founded June 1850."
-    language = _('English')
+    publisher             = "Harpers's"
    category              = 'news, politics, USA'    
    oldest_article        = 30
    max_articles_per_feed = 100
    no_stylesheets        = True
@ -26,6 +28,15 @@ class Harpers_full(BasicNewsRecipe):
    INDEX = strftime('http://www.harpers.org/archive/%Y/%m')
    LOGIN = 'http://www.harpers.org'
    cover_url = strftime('http://www.harpers.org/media/pages/%Y/%m/gif/0001.gif')
    remove_javascript     = True
    html2lrf_options = [
                          '--comment', description
                        , '--category', category
                        , '--publisher', publisher
                        ]
    html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"' 
    keep_only_tags = [ dict(name='div', attrs={'id':'cached'}) ]
    remove_tags = [
@ -60,3 +71,10 @@ class Harpers_full(BasicNewsRecipe):
                                 ,'description':''
                                })
        return [(soup.head.title.string, articles)]
    def preprocess_html(self, soup):
        for item in soup.findAll(style=True):
            del item['style']
        return soup
    language = _('English')
--- a/src/calibre/web/feeds/recipes/recipe_infobae.py
+++ b/src/calibre/web/feeds/recipes/recipe_infobae.py
@ -1,7 +1,7 @@
 #!/usr/bin/env  python
 __license__   = 'GPL v3'
-__copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
+__copyright__ = '2008-2009, Darko Miletic <darko.miletic at gmail.com>'
 '''
 infobae.com
 '''
@ -12,21 +12,23 @@ class Infobae(BasicNewsRecipe):
    title                 = 'Infobae.com'
    __author__            = 'Darko Miletic'
    description           = 'Informacion Libre las 24 horas'
    publisher             = 'Infobae.com'
    category              = 'news, politics, Argentina'     
    oldest_article        = 2
    language = _('Spanish')
    max_articles_per_feed = 100
    no_stylesheets        = True
    use_embedded_content  = False
    encoding              = 'iso-8859-1'
    cover_url             = 'http://www.infobae.com/imgs/header/header.gif'
    remove_javascript     = True
    html2lrf_options = [
                          '--comment', description
-                        , '--category'      , 'news, Argentina'
+                        , '--category', category
-                        , '--publisher'     , 'Infobae.com'
+                        , '--publisher', publisher
                        ]
-
+    html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"' 
    feeds = [ 
              (u'Noticias'  , u'http://www.infobae.com/adjuntos/html/RSS/hoy.xml'       )
@ -39,3 +41,12 @@ class Infobae(BasicNewsRecipe):
        main, sep, article_part = url.partition('contenidos/')
        article_id, rsep, rrest = article_part.partition('-')    
        return u'http://www.infobae.com/notas/nota_imprimir.php?Idx=' + article_id
    def preprocess_html(self, soup):
        mtag = '<meta http-equiv="Content-Type" content="text/html; charset=utf-8">\n<meta http-equiv="Content-Language" content="es-AR"/>\n'
        soup.head.insert(0,mtag)    
        for item in soup.findAll(style=True):
            del item['style']
        return soup
    language = _('Spanish')
--- a/src/calibre/web/feeds/recipes/recipe_jb_online.py
+++ b/src/calibre/web/feeds/recipes/recipe_jb_online.py
@ -12,20 +12,24 @@ class JBOnline(BasicNewsRecipe):
    title                 = 'Jornal Brasileiro Online'
    __author__            = 'Darko Miletic'
    description           = 'News from Brasil'
    publisher             = 'Jornal Brasileiro'
    category              = 'news, politics, Brasil'    
    oldest_article        = 2
    language = _('Portugese')
    max_articles_per_feed = 100
    no_stylesheets        = True
    use_embedded_content  = False
    encoding              = 'cp1252'
    cover_url             = 'http://jbonline.terra.com.br/img/logo_01.gif'
    remove_javascript     = True
    html2lrf_options = [
                          '--comment', description
-                        , '--category'      , 'news, Brasil'
+                        , '--category', category
-                        , '--publisher'     , title
+                        , '--publisher', publisher
                        ]
    html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"' 
    keep_only_tags = [dict(name='div', attrs={'id':'corpoNoticia'})]
    remove_tags = [dict(name=['script','object','form'])]
@ -36,7 +40,8 @@ class JBOnline(BasicNewsRecipe):
        ifr = soup.find('iframe')
        if ifr:
           ifr.extract()
-        item = soup.find('div', attrs={'id':'corpoNoticia'})
+        for item in soup.findAll(style=True):
        if item:
            del item['style']           
        return soup
    language = _('Portugese')
--- a/src/calibre/web/feeds/recipes/recipe_jutarnji.py
+++ b/src/calibre/web/feeds/recipes/recipe_jutarnji.py
@ -6,28 +6,35 @@ __copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
 jutarnji.hr
 '''
-import string, re
+import re
 from calibre.web.feeds.news import BasicNewsRecipe
 from calibre.ebooks.BeautifulSoup import BeautifulSoup
 class Jutarnji(BasicNewsRecipe):
-    title                 = 'Jutarnji'
+    title                 = u'Jutarnji'
-    __author__            = 'Darko Miletic'
+    __author__            = u'Darko Miletic'
-    description           = 'Online izdanje Jutarnjeg lista'
+    description           = u'Hrvatski portal'
    publisher             = 'Jutarnji.hr'
    category              = 'news, politics, Croatia'    
    oldest_article        = 2
    max_articles_per_feed = 100
    simultaneous_downloads = 1
    delay = 1    
    no_stylesheets        = True
    use_embedded_content  = False
    remove_javascript     = True
    encoding              = 'cp1250'
-    cover_url = 'http://www.jutarnji.hr/EPHResources/Images/2008/06/05/jhrlogo.png'
+    extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "monospace1";src:url(res:///opt/sony/ebook/FONT/tt0419m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{text-align: left; font-family: serif1, serif} .article_date{font-family: monospace1, monospace} .article_description{font-family: sans1, sans-serif} .navbar{font-family: monospace1, monospace}'
    html2lrf_options = [
                          '--comment', description
-                        , '--base-font-size', '10'
+                        , '--category', category
-                        , '--category', 'news, Croatia'
+                        , '--publisher', publisher
                        , '--publisher', 'Europapress holding d.o.o.'
                        ]
    html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"' 
    preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
    remove_tags = [ 
@ -49,11 +56,16 @@ class Jutarnji(BasicNewsRecipe):
    def print_version(self, url):
        main, split, rest = url.partition('.jl')
        rmain, rsplit, rrest = main.rpartition(',')
-        return u'http://www.jutarnji.hr/ispis_clanka.jl?artid=' + rrest
+        return 'http://www.jutarnji.hr/ispis_clanka.jl?artid=' + rrest
    def preprocess_html(self, soup):
        mtag = '<meta http-equiv="Content-Type" content="text/html; charset=utf-8">'
        soup.head.insert(0,mtag)
-        soup.prettify()
+        mtag = '<meta http-equiv="Content-Language" content="hr"/>'
        soup.head.insert(0,mtag)    
        for item in soup.findAll(style=True):
            del item['style']        
        for item in soup.findAll(width=True):
            del item['width']        
        return soup
--- a/src/calibre/web/feeds/recipes/recipe_juventudrebelde.py
+++ b/src/calibre/web/feeds/recipes/recipe_juventudrebelde.py
@ -13,21 +13,25 @@ class Juventudrebelde(BasicNewsRecipe):
    title                 = 'Juventud Rebelde'
    __author__            = 'Darko Miletic'
    description           = 'Diario de la Juventud Cubana'
    publisher             = 'Juventud rebelde'
    category              = 'news, politics, Cuba'    
    oldest_article        = 2
    language = _('Spanish')
    max_articles_per_feed = 100
    no_stylesheets        = True
    use_embedded_content  = False
    encoding              = 'cp1252'
    cover_url             = strftime('http://www.juventudrebelde.cu/UserFiles/File/impreso/iportada-%Y-%m-%d.jpg')
    remove_javascript     = True
    html2lrf_options = [
                          '--comment', description
-                        , '--category'      , 'news, Cuba'
+                        , '--category', category
-                        , '--publisher'     , title
+                        , '--publisher', publisher
                        , '--ignore-tables'
                        ]
    html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"' 
    keep_only_tags = [dict(name='div', attrs={'id':'noticia'})]
    feeds = [
@ -40,4 +44,11 @@ class Juventudrebelde(BasicNewsRecipe):
              ,(u'Lectura', u'http://www.juventudrebelde.cu/rss/generales.php?seccion=lectura' )
            ]
    def preprocess_html(self, soup):
        mtag = '<meta http-equiv="Content-Language" content="es-CU"/>'
        soup.head.insert(0,mtag)    
        for item in soup.findAll(style=True):
            del item['style']
        return soup
    language = _('Spanish')
--- a/src/calibre/web/feeds/recipes/recipe_juventudrebelde_english.py
+++ b/src/calibre/web/feeds/recipes/recipe_juventudrebelde_english.py
@ -5,7 +5,6 @@ __copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
 '''
 juventudrebelde.co.cu
 '''
 from calibre import strftime
 from calibre.web.feeds.news import BasicNewsRecipe
@ -13,22 +12,33 @@ class Juventudrebelde_english(BasicNewsRecipe):
    title                 = 'Juventud Rebelde in english'
    __author__            = 'Darko Miletic'
    description           = 'The newspaper of Cuban Youth'    
-    language = _('English')    
+    publisher             = 'Juventud Rebelde'
    category              = 'news, politics, Cuba'
    oldest_article        = 2
    max_articles_per_feed = 100
    no_stylesheets        = True
    use_embedded_content  = False
    encoding              = 'iso-8859-1'
    remove_javascript     = True
    html2lrf_options = [
                          '--comment', description
-                        , '--category'      , 'news, Cuba'
+                        , '--category', category
-                        , '--publisher'     , title
+                        , '--publisher', publisher
                        , '--ignore-tables'
                        ]
    html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"' 
    keep_only_tags = [dict(name='div', attrs={'class':'read'})]
    feeds = [(u'All news', u'http://www.juventudrebelde.cip.cu/rss/all/' )]
    def preprocess_html(self, soup):
        mtag = '<meta http-equiv="Content-Language" content="es-CU"/>'
        soup.head.insert(0,mtag)    
        for item in soup.findAll(style=True):
            del item['style']
        return soup
    language = _('English')    
--- a/src/calibre/web/feeds/recipes/recipe_la_cuarta.py
+++ b/src/calibre/web/feeds/recipes/recipe_la_cuarta.py
@ -11,25 +11,28 @@ from calibre.web.feeds.news import BasicNewsRecipe
 class LaCuarta(BasicNewsRecipe):
    title                 = 'La Cuarta'
    __author__            = 'Darko Miletic'
-    description           = 'El sitio de noticias online de Chile'    
+    description           = 'La Cuarta Cibernetica: El Diario popular'
    publisher             = 'CODISA, Consorcio Digital S.A.'
    category              = 'news, politics, entertainment, Chile'
    oldest_article        = 2
    language = _('Spanish')
    max_articles_per_feed = 100
    no_stylesheets        = True
    use_embedded_content  = False
    encoding              = 'cp1252'
    remove_javascript     = True
    html2lrf_options = [
                          '--comment', description
-                        , '--category'      , 'news, Chile'
+                        , '--category', category
-                        , '--publisher'     , title
+                        , '--publisher', publisher
                        ]
    html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"' 
    keep_only_tags = [dict(name='div', attrs={'class':'articulo desplegado'}) ]
    remove_tags = [  
-                     dict(name='script')
+                     dict(name='ul')
                    ,dict(name='ul')
                    ,dict(name='div', attrs={'id':['toolbox','articleImageDisplayer','enviarAmigo']})
                    ,dict(name='div', attrs={'class':['par ad-1','par ad-2']})
                    ,dict(name='input')
@ -37,7 +40,14 @@ class LaCuarta(BasicNewsRecipe):
                    ,dict(name='strong', text='PUBLICIDAD')
                  ]
    def preprocess_html(self, soup):
        mtag = '<meta http-equiv="Content-Language" content="es-CL"/>'
        soup.head.insert(0,mtag)    
        for item in soup.findAll(style=True):
            del item['style']
        return soup
    feeds = [(u'Noticias', u'http://lacuarta.cl/app/rss?sc=TEFDVUFSVEE=')]
    language = _('Spanish')
--- a/src/calibre/web/feeds/recipes/recipe_la_segunda.py
+++ b/src/calibre/web/feeds/recipes/recipe_la_segunda.py
@ -12,21 +12,24 @@ class LaSegunda(BasicNewsRecipe):
    title                 = 'La Segunda'
    __author__            = 'Darko Miletic'
    description           = 'El sitio de noticias online de Chile' 
-    language = _('Spanish')    
+    publisher             = 'La Segunda'
    category              = 'news, politics, Chile'
    oldest_article        = 2
    max_articles_per_feed = 100
    no_stylesheets        = True
    use_embedded_content  = False
    encoding              = 'cp1252'
    cover_url             = 'http://www.lasegunda.com/imagenes/logotipo_lasegunda_Oli.gif'
    remove_javascript     = True
    html2lrf_options = [
                          '--comment', description
-                        , '--category'      , 'news, Chile'
+                        , '--category', category
-                        , '--publisher'     , title
+                        , '--publisher', publisher
                        , '--ignore-tables'
                        ]
    html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"' 
    keep_only_tags = [dict(name='table')]
    feeds = [ 
@ -46,3 +49,13 @@ class LaSegunda(BasicNewsRecipe):
        rest, sep, article_id = url.partition('index.asp?idnoticia=')        
        return u'http://www.lasegunda.com/edicionOnline/include/secciones/_detalle_impresion.asp?idnoticia=' + article_id
    def preprocess_html(self, soup):
        mtag = '<meta http-equiv="Content-Language" content="es-CL"/>'
        soup.head.insert(0,mtag)
        for item in soup.findAll(name='table', width=True):
            del item['width']
        for item in soup.findAll(style=True):
            del item['style']
        return soup
    language = _('Spanish')    
--- a/src/calibre/web/feeds/recipes/recipe_la_tercera.py
+++ b/src/calibre/web/feeds/recipes/recipe_la_tercera.py
@ -12,20 +12,24 @@ class LaTercera(BasicNewsRecipe):
    title                 = 'La Tercera'
    __author__            = 'Darko Miletic'
    description           = 'El sitio de noticias online de Chile'
    publisher             = 'La Tercera'
    category              = 'news, politics, Chile'
    oldest_article        = 2
    language = _('Spanish')
    max_articles_per_feed = 100
    no_stylesheets        = True
    use_embedded_content  = False
    encoding              = 'cp1252'
    remove_javascript     = True
    use_embedded_content  = False
    html2lrf_options = [
                          '--comment', description
-                        , '--category'      , 'news, Chile'
+                        , '--category', category
-                        , '--publisher'     , title
+                        , '--publisher', publisher
                        ]
-    keep_only_tags = [dict(name='div', attrs={'class':'span-16 articulo border'}) ]
+    html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"' 
    keep_only_tags = [dict(name='div', attrs={'class':['span-16 articulo border','span-16 border','span-16']}) ]
    remove_tags = [  
                     dict(name='script')
@ -50,4 +54,11 @@ class LaTercera(BasicNewsRecipe):
              ,(u'Educacion', u'http://www.latercera.com/app/rss?sc=TEFURVJDRVJB&category=657')
            ]
    def preprocess_html(self, soup):
        mtag = '<meta http-equiv="Content-Language" content="es-CL"/>'
        soup.head.insert(0,mtag)    
        for item in soup.findAll(style=True):
            del item['style']
        return soup
    language = _('Spanish')
--- a/src/calibre/web/feeds/recipes/recipe_lanacion.py
+++ b/src/calibre/web/feeds/recipes/recipe_lanacion.py
@ -1,7 +1,7 @@
 #!/usr/bin/env  python
 __license__   = 'GPL v3'
-__copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
+__copyright__ = '2008-2009, Darko Miletic <darko.miletic at gmail.com>'
 '''
 lanacion.com.ar
 '''
@ -11,20 +11,23 @@ from calibre.web.feeds.news import BasicNewsRecipe
 class Lanacion(BasicNewsRecipe):
    title                 = 'La Nacion'
    __author__            = 'Darko Miletic'
-    description           = 'Informacion actualizada las 24 horas, con noticias de Argentina y del mundo - Informate ya!'    
+    description           = 'Noticias de Argentina y el resto del mundo'
    publisher             = 'La Nacion'
    category              = 'news, politics, Argentina'    
    oldest_article        = 2
    language = _('Spanish')
    max_articles_per_feed = 100
    no_stylesheets        = True
    use_embedded_content  = False
    remove_javascript     = True
    no_stylesheets        = True
    html2lrf_options = [
                          '--comment', description
-                        , '--base-font-size', '10'
+                        , '--category', category
-                        , '--category', 'news, Argentina'
+                        , '--publisher', publisher
                        , '--publisher', 'La Nacion SA'
                        ]
    html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"' 
    keep_only_tags = [dict(name='div', attrs={'class':'nota floatFix'})]
    remove_tags = [
                     dict(name='div' , attrs={'class':'notaComentario floatFix noprint' })
@ -47,11 +50,11 @@ class Lanacion(BasicNewsRecipe):
                        ,(u'Revista'              , u'http://www.lanacion.com.ar/herramientas/rss/index.asp?categoria_id=494' )
                     ]
-    def get_cover_url(self):
+    def preprocess_html(self, soup):
-        index = 'http://www.lanacion.com.ar'
+        mtag = '<meta http-equiv="Content-Language" content="es-AR"/>'
-        cover_url = None
+        soup.head.insert(0,mtag)    
-        soup = self.index_to_soup(index)
+        for item in soup.findAll(style=True):
-        cover_item = soup.find('img',attrs={'class':'logo'})
+            del item['style']
-        if cover_item:
+        return soup
-           cover_url = index + cover_item['src']
+
-        return cover_url
+    language = _('Spanish')
--- a/src/calibre/web/feeds/recipes/recipe_lanacion_chile.py
+++ b/src/calibre/web/feeds/recipes/recipe_lanacion_chile.py
@ -13,20 +13,24 @@ class LaNacionChile(BasicNewsRecipe):
    title                 = 'La Nacion Chile'
    __author__            = 'Darko Miletic'
    description           = 'El sitio de noticias online de Chile'
    publisher             = 'La Nacion'
    category              = 'news, politics, Chile'
    oldest_article        = 2
    language = _('Spanish')
    max_articles_per_feed = 100
    no_stylesheets        = True
    use_embedded_content  = False
    encoding              = 'cp1252'
    cover_url             = 'http://www.lanacion.cl/prontus_noticias_v2/imag/site/logo.gif'
    remove_javascript     = True
    html2lrf_options = [
                          '--comment', description
-                        , '--category'      , 'news, Chile'
+                        , '--category', category
-                        , '--publisher'     , title
+                        , '--publisher', publisher
                        ]
    html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"' 
    keep_only_tags = [dict(name='div', attrs={'class':'bloque'})]
    feeds = [(u'Noticias', u'http://www.lanacion.cl/rss.xml')]
@ -41,5 +45,10 @@ class LaNacionChile(BasicNewsRecipe):
        item = soup.find('a', attrs={'href':'javascript:window.close()'})
        if item:
           item.extract()
        mtag = '<meta http-equiv="Content-Language" content="es-CL"/>'
        soup.head.insert(0,mtag)    
        for item in soup.findAll(style=True):
            del item['style']           
        return soup
    language = _('Spanish')
--- a/src/calibre/web/feeds/recipes/recipe_laprensa.py
+++ b/src/calibre/web/feeds/recipes/recipe_laprensa.py
@ -1,7 +1,7 @@
 #!/usr/bin/env  python
 __license__   = 'GPL v3'
-__copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
+__copyright__ = '2008-2009, Darko Miletic <darko.miletic at gmail.com>'
 '''
 laprensa.com.ar
 '''
@ -13,20 +13,24 @@ class LaPrensa(BasicNewsRecipe):
    title                 = 'La Prensa'
    __author__            = 'Darko Miletic'
    description           = 'Informacion Libre las 24 horas'
    publisher             = 'La Prensa'
    category              = 'news, politics, Argentina'    
    oldest_article        = 7
    language = _('Spanish')
    max_articles_per_feed = 100
    no_stylesheets        = True
    use_embedded_content  = False
    encoding              = 'cp1252'
    cover_url             = 'http://www.laprensa.com.ar/imgs/logo.gif'
    remove_javascript     = True
    html2lrf_options = [
                          '--comment', description
-                        , '--category'      , 'news, Argentina'
+                        , '--category', category
-                        , '--publisher'     , title
+                        , '--publisher', publisher
                        ]
    html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"' 
    feeds = [ 
              (u'Politica'    , u'http://www.laprensa.com.ar/Rss.aspx?Rss=4' )
             ,(u'Economia'    , u'http://www.laprensa.com.ar/Rss.aspx?Rss=5' )
@ -47,5 +51,10 @@ class LaPrensa(BasicNewsRecipe):
    def preprocess_html(self, soup):
        del soup.body['onload']
        mtag = '<meta http-equiv="Content-Language" content="es-AR"/>'
        soup.head.insert(0,mtag)    
        for item in soup.findAll(style=True):
            del item['style']        
        return soup
    language = _('Spanish')
--- a/src/calibre/web/feeds/recipes/recipe_nin.py
+++ b/src/calibre/web/feeds/recipes/recipe_nin.py
@ -7,15 +7,17 @@ nin.co.yu
 '''
 import re, urllib
 from calibre.web.feeds.news import BasicNewsRecipe
 class Nin(BasicNewsRecipe):    
    title                  = 'NIN online'
    __author__             = 'Darko Miletic'
    description            = 'Nedeljne informativne novine'
    publisher              = 'NIN'
    category               = 'news, politics, Serbia'    
    no_stylesheets         = True
    oldest_article         = 15
    language              = _('Serbian')
    simultaneous_downloads = 1
    delay                  = 1
    encoding               = 'utf8'
@ -23,12 +25,18 @@ class Nin(BasicNewsRecipe):
    PREFIX                 = 'http://www.nin.co.yu'
    INDEX                  = PREFIX + '/?change_lang=ls'
    LOGIN                  = PREFIX + '/?logout=true'
    remove_javascript      = True
    use_embedded_content   = False
    extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "monospace1";src:url(res:///opt/sony/ebook/FONT/tt0419m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{text-align: left; font-family: serif1, serif} .article_date{font-family: monospace1, monospace} .article_description{font-family: sans1, sans-serif} .navbar{font-family: monospace1, monospace}'
    html2lrf_options = [
                          '--comment', description
-                        , '--category'      , 'news, politics, Serbia'
+                        , '--category', category
-                        , '--publisher'     , 'NIN'
+                        , '--publisher', publisher
                        ]
    html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"' 
    preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
    def get_browser(self):
@ -54,3 +62,12 @@ class Nin(BasicNewsRecipe):
        if link_item:
           cover_url = self.PREFIX + link_item['src']
        return cover_url
    def preprocess_html(self, soup):
        mtag = '<meta http-equiv="Content-Language" content="sr-Latn-RS"/>'
        soup.head.insert(0,mtag)    
        for item in soup.findAll(style=True):
            del item['style']
        return soup
    language              = _('Serbian')
--- a/src/calibre/web/feeds/recipes/recipe_novosti.py
+++ b/src/calibre/web/feeds/recipes/recipe_novosti.py
@ -5,32 +5,45 @@ __copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
 '''
 novosti.rs
 '''
-import string,re
+
 import re
 from calibre.web.feeds.news import BasicNewsRecipe
 class Novosti(BasicNewsRecipe):
-    title                 = 'Vecernje Novosti'
+    title                 = u'Vecernje Novosti'
-    __author__            = 'Darko Miletic'
+    __author__            = u'Darko Miletic'
-    description           = 'novosti, vesti, politika, dosije, drustvo, ekonomija, hronika, reportaze, svet, kultura, sport, beograd, regioni, mozaik, feljton, intrvju, pjer, fudbal, kosarka, podvig, arhiva, komentari, kolumne, srbija, republika srpska,Vecernje novosti'
+    description           = u'Vesti'
    publisher             = 'Kompanija Novosti'
    category              = 'news, politics, Serbia'        
    oldest_article        = 2
    language              = _('Serbian')
    max_articles_per_feed = 100
    no_stylesheets        = True
    use_embedded_content  = False
    encoding              = 'utf8'
    remove_javascript     = True
    extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "monospace1";src:url(res:///opt/sony/ebook/FONT/tt0419m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{text-align: left; font-family: serif1, serif} .article_date{font-family: monospace1, monospace} .article_description{font-family: sans1, sans-serif} .navbar{font-family: monospace1, monospace}'
    html2lrf_options = [
                          '--comment', description
-                        , '--base-font-size', '10'
+                        , '--category', category
-                        , '--category', 'news, Serbia'
+                        , '--publisher', publisher
                        , '--publisher', 'Novosti AD'
                        ]
    html2epub_options  = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"' 
    preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
    keep_only_tags     = [dict(name='div', attrs={'class':'jednaVest'})]
-    remove_tags_after  = dict(name='div', attrs={'class':'info_bottom'})
+    remove_tags        = [dict(name='div', attrs={'class':['info','info_bottom','clip_div']})]
    remove_tags = [
                     dict(name='div', attrs={'class':'info'})
                    ,dict(name='div', attrs={'class':'info_bottom'})
                  ]
    feeds              = [(u'Vesti', u'http://www.novosti.rs/php/vesti/rss.php')]
    def preprocess_html(self, soup):
        mtag = '<meta http-equiv="Content-Language" content="sr-Latn"/>'
        soup.head.insert(0,mtag)    
        for item in soup.findAll(style=True):
            del item['style']        
        return soup
    language              = _('Serbian')
--- a/src/calibre/web/feeds/recipes/recipe_nspm.py
+++ b/src/calibre/web/feeds/recipes/recipe_nspm.py
@ -6,28 +6,36 @@ __copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
 nspm.rs
 '''
-import string,re
+import re
 from calibre.web.feeds.news import BasicNewsRecipe
 class Nspm(BasicNewsRecipe):
    title                 = u'Nova srpska politicka misao'
    __author__            = 'Darko Miletic'
    description           = 'Casopis za politicku teoriju i drustvena istrazivanja'    
    publisher             = 'NSPM'
    category              = 'news, politics, Serbia'    
    oldest_article        = 7
    language = _('Serbian')
    max_articles_per_feed = 100
    no_stylesheets        = True
    use_embedded_content  = False
    INDEX                 = 'http://www.nspm.rs/?alphabet=l'
-    cover_url = 'http://nspm.rs/templates/jsn_epic_pro/images/logol.jpg'
+    encoding              = 'utf8'
    remove_javascript     = True
    extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "monospace1";src:url(res:///opt/sony/ebook/FONT/tt0419m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{text-align: left; font-family: serif1, serif} .article_date{font-family: monospace1, monospace} .article_description{font-family: sans1, sans-serif} .navbar{font-family: monospace1, monospace}'
    html2lrf_options = [
                          '--comment', description
-                        , '--base-font-size', '10'
+                        , '--category', category
-                        , '--category', 'news, politics, Serbia'
+                        , '--publisher', publisher
-                        , '--publisher', 'IIC NSPM'
+                        , '--ignore-tables'
                        ]
    html2epub_options  = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"' 
    preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
    remove_tags        = [dict(name='a')]
    def get_browser(self):
        br = BasicNewsRecipe.get_browser()
@ -38,3 +46,15 @@ class Nspm(BasicNewsRecipe):
    def print_version(self, url):
        return url.replace('.html','/stampa.html')
    def preprocess_html(self, soup):
        soup.html['xml:lang'] = 'sr-Latn-RS'
        soup.html['lang']     = 'sr-Latn-RS'
        ftag = soup.find('meta',attrs={'http-equiv':'Content-Language'})
        if ftag:
           ftag['content'] = 'sr-Latn-RS'
        for item in soup.findAll(style=True):
            del item['style']        
        return soup
    language = _('Serbian')
--- a/src/calibre/web/feeds/recipes/recipe_o_globo.py
+++ b/src/calibre/web/feeds/recipes/recipe_o_globo.py
@ -12,20 +12,24 @@ class OGlobo(BasicNewsRecipe):
    title                 = 'O Globo'
    __author__            = 'Darko Miletic'
    description           = 'News from Brasil'
    publisher             = 'O Globo'
    category              = 'news, politics, Brasil'
    oldest_article        = 2
    max_articles_per_feed = 100
    language = _('Portugese')
    no_stylesheets        = True
    use_embedded_content  = False
    encoding              = 'cp1252'
    cover_url             = 'http://oglobo.globo.com/_img/o-globo.png'
    remove_javascript     = True
    html2lrf_options = [
                          '--comment', description
-                        , '--category'      , 'news, Brasil'
+                        , '--category', category
-                        , '--publisher'     , title
+                        , '--publisher', publisher
                        ]
    html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"' 
    keep_only_tags = [dict(name='div', attrs={'id':'ltintb'})]
    remove_tags = [  
@ -56,3 +60,10 @@ class OGlobo(BasicNewsRecipe):
              ,(u'Economia', u'http://oglobo.globo.com/rss/plantaoeconomia.xml')
              ,(u'Tecnologia', u'http://oglobo.globo.com/rss/plantaotecnologia.xml')
            ]
    def preprocess_html(self, soup):
        for item in soup.findAll(style=True):
            del item['style']
        return soup
    language = _('Portugese')
--- a/src/calibre/web/feeds/recipes/recipe_pagina12.py
+++ b/src/calibre/web/feeds/recipes/recipe_pagina12.py
@ -1,32 +1,37 @@
 #!/usr/bin/env  python
 __license__   = 'GPL v3'
-__copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
+__copyright__ = '2008-2009, Darko Miletic <darko.miletic at gmail.com>'
 '''
 pagina12.com.ar
 '''
 from calibre import strftime
 from calibre.web.feeds.news import BasicNewsRecipe
 class Pagina12(BasicNewsRecipe):
    title                 = u'Pagina/12'
    __author__            = 'Darko Miletic'
    description           = 'Noticias de Argentina y el resto del mundo'
-    language = _('Spanish')    
+    publisher             = 'La Pagina S.A.'
    category              = 'news, politics, Argentina'    
    oldest_article        = 2
    max_articles_per_feed = 100
    no_stylesheets        = True
    use_embedded_content  = False
    encoding              = 'cp1252'
    cover_url             = strftime('http://www.pagina12.com.ar/fotos/%Y%m%d/diario/TAPAN.jpg')
    remove_javascript     = True
    use_embedded_content  = False
    html2lrf_options = [
                          '--comment', description
-                        , '--category'      , 'news, Argentina'
+                        , '--category', category
-                        , '--publisher'     , 'La Pagina S.A.'
+                        , '--publisher', publisher
                        ]
    html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"' 
    remove_tags = [
                     dict(name='div', attrs={'id':'volver'})
@ -38,3 +43,12 @@ class Pagina12(BasicNewsRecipe):
    def print_version(self, url):
        return url.replace('http://www.pagina12.com.ar/','http://www.pagina12.com.ar/imprimir/')
    def preprocess_html(self, soup):
        mtag = '<meta http-equiv="Content-Language" content="es-AR"/>'
        soup.head.insert(0,mtag)    
        for item in soup.findAll(style=True):
            del item['style']
        return soup
    language = _('Spanish')    
--- a/src/calibre/web/feeds/recipes/recipe_pescanik.py
+++ b/src/calibre/web/feeds/recipes/recipe_pescanik.py
@ -6,31 +6,53 @@ __copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
 pescanik.net
 '''
-import string,re
+import re
 from calibre.web.feeds.news import BasicNewsRecipe
 class Pescanik(BasicNewsRecipe):
    title                 = 'Pescanik'
    __author__            = 'Darko Miletic'
    description           = 'Pescanik'
    publisher             = 'Pescanik'
    category              = 'news, politics, Serbia'    
    oldest_article        = 7
    language              = _('Serbian')
    max_articles_per_feed = 100
    no_stylesheets        = True
    use_embedded_content  = False
-    html2lrf_options = ['--base-font-size', '10']
+    remove_javascript     = True
-    html2epub_options = 'base_font_size = "10pt"'
+    encoding              = 'utf8'
    extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "monospace1";src:url(res:///opt/sony/ebook/FONT/tt0419m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{text-align: left; font-family: serif1, serif} .article_date{font-family: monospace1, monospace} .article_description{font-family: sans1, sans-serif} .navbar{font-family: monospace1, monospace}'
    html2lrf_options = [
                          '--comment', description
                        , '--category', category
                        , '--publisher', publisher
                        ]
    html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"' 
    cover_url = "http://pescanik.net/templates/ja_teline/images/logo.png"
    preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
-    remove_tags_after = dict(name='div', attrs={'class':'article_seperator'})
+    remove_tags = [
-    
+                     dict(name='td'  , attrs={'class':'buttonheading'})
-    remove_tags = [dict(name='td'  , attrs={'class':'buttonheading'})]
+                    ,dict(name='span', attrs={'class':'article_seperator'})
                    ,dict(name=['object','link'])
                  ]
    feeds       = [(u'Pescanik Online', u'http://pescanik.net/index.php?option=com_rd_rss&id=12')]
    def print_version(self, url):
-        nurl = url.replace('http://pescanik.net/index.php','http://pescanik.net/index2.php')        
+        nurl = url.replace('/index.php','/index2.php')        
        return nurl + '&pop=1&page=0'
    def preprocess_html(self, soup):
        mtag = '<meta http-equiv="Content-Language" content="sr-Latn-RS"/>'
        soup.head.insert(0,mtag)    
        for item in soup.findAll(style=True):
            del item['style']
        return soup
    language              = _('Serbian')
--- a/src/calibre/web/feeds/recipes/recipe_politika.py
+++ b/src/calibre/web/feeds/recipes/recipe_politika.py
@ -5,37 +5,61 @@ __copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
 '''
 politika.rs
 '''
-import string,re
+import re
 from calibre.web.feeds.news import BasicNewsRecipe
 class Politika(BasicNewsRecipe):
-    title                 = 'Politika Online'
+    title                 = u'Politika Online'
    __author__            = 'Darko Miletic'
    description           = 'Najstariji dnevni list na Balkanu'
    publisher             = 'Politika novine i Magazini d.o.o'
    category              = 'news, politics, Serbia'            
    oldest_article        = 2
    max_articles_per_feed = 100
    no_stylesheets        = True
    extra_css             = '.content_center_border {text-align: left;}' 
    use_embedded_content  = False
-    cover_url = 'http://www.politika.rs:8080/images/politika.gif'
+    remove_javascript     = True
    encoding              = 'utf8'
    extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "monospace1";src:url(res:///opt/sony/ebook/FONT/tt0419m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{text-align: left; font-family: serif1, serif} .article_date{font-family: monospace1, monospace} .article_description{font-family: sans1, sans-serif} .navbar{font-family: monospace1, monospace}'
    html2lrf_options = [
                          '--comment', description
-                        , '--base-font-size', '10'
+                        , '--category', category
-                        , '--category', 'news, Serbia'
+                        , '--publisher', publisher
                        , '--publisher', 'POLITIKA NOVINE I MAGAZINI d.o.o.'
                        ]
    html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"' 
    preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
-    keep_only_tags     = [ dict(name='div', attrs={'class':'contentcenter'}) ] 
+    keep_only_tags     = [dict(name='div', attrs={'class':'content_center_border'})]
-    remove_tags_after  = dict(name='div', attrs={'class':'datum_item_details'})
+
    remove_tags = [ 
                    dict(name='div', attrs={'class':['send_print','txt-komentar']})
                   ,dict(name=['object','link','a'])
                   ,dict(name='h1', attrs={'class':'box_header-tags'})                   
                  ]
    feeds          = [  
                         (u'Politika'             , u'http://www.politika.rs/rubrike/Politika/index.1.lt.xml'             )
                        ,(u'Svet'                 , u'http://www.politika.rs/rubrike/Svet/index.1.lt.xml'                 )
                        ,(u'Redakcijski komentari', u'http://www.politika.rs/rubrike/redakcijski-komentari/index.1.lt.xml')
                        ,(u'Pogledi'              , u'http://www.politika.rs/pogledi/index.lt.xml'                        )
                        ,(u'Pogledi sa strane'    , u'http://www.politika.rs/rubrike/Pogledi-sa-strane/index.1.lt.xml'    )
                        ,(u'Tema dana'            , u'http://www.politika.rs/rubrike/tema-dana/index.1.lt.xml'            )
                        ,(u'Kultura'              , u'http://www.politika.rs/rubrike/Kultura/index.1.lt.xml'              )
                        ,(u'Zivot i stil'         , u'http://www.politika.rs/rubrike/zivot-i-stil/index.1.lt.xml'         )                        
                     ]
    def preprocess_html(self, soup):
        mtag = '<meta http-equiv="Content-Language" content="sr-Latn-RS"/>'
        soup.head.insert(0,mtag)    
        for item in soup.findAll(style=True):
            del item['style']
        ftag = soup.find('div',attrs={'class':'content_center_border'})
        if ftag:
           ftag['align'] = 'left'
        return soup
--- a/src/calibre/web/feeds/recipes/recipe_vijesti.py
+++ b/src/calibre/web/feeds/recipes/recipe_vijesti.py
@ -1,13 +1,13 @@
 #!/usr/bin/env  python
 __license__   = 'GPL v3'
-__copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
+__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
 '''
 vijesti.cg.yu
 '''
-import string,re
+import re
 from calibre.web.feeds.news import BasicNewsRecipe
@ -15,24 +15,35 @@ class Vijesti(BasicNewsRecipe):
    title                 = 'Vijesti'
    __author__            = 'Darko Miletic'
    description           = 'News from Montenegro'
-    oldest_article        = 2
+    publisher             = 'Daily Press Vijesti'
-    language              = _('Serbian')
+    category              = 'news, politics, Montenegro'    
    oldest_article        = 1
    max_articles_per_feed = 100
    no_stylesheets        = True
-    use_embedded_content  = False
+    remove_javascript     = True
    encoding              = 'cp1250'
    cover_url             = 'http://www.vijesti.cg.yu/img/logo.gif'
-
+    remove_javascript     = True
-    preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
+    use_embedded_content  = False
    extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "monospace1";src:url(res:///opt/sony/ebook/FONT/tt0419m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{text-align: left; font-family: serif1, serif} .article_date{font-family: monospace1, monospace} .article_description{font-family: sans1, sans-serif} .navbar{font-family: monospace1, monospace}'
    html2lrf_options = [
                          '--comment', description
-                        , '--category'      , 'news, Montenegro'
+                        , '--category', category
-                        , '--publisher'     , 'Daily Press Vijesti'
+                        , '--publisher', publisher
                        ]
    html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"' 
    preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
    keep_only_tags = [dict(name='div', attrs={'id':'mainnews'})]
    remove_tags = [
                     dict(name='div', attrs={'align':'right'})
                    ,dict(name=['object','link'])
                  ]
    feeds = [(u'Sve vijesti', u'http://www.vijesti.cg.yu/rss.php' )]
    def preprocess_html(self, soup):
@ -40,4 +51,10 @@ class Vijesti(BasicNewsRecipe):
        soup.html['lang']     = 'sr-Latn-ME'
        mtag = '<meta http-equiv="Content-Language" content="sr-Latn-ME"/>'
        soup.head.insert(0,mtag)
        for item in soup.findAll('img'):
            if item.has_key('align'):
               del item['align']
               item.insert(0,'<br /><br />')
        return soup
    language              = _('Serbian')
--- a/src/calibre/web/feeds/recipes/recipe_vreme.py
+++ b/src/calibre/web/feeds/recipes/recipe_vreme.py
@ -6,27 +6,34 @@ __copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
 vreme.com
 '''
-import string,re
+import re
 from calibre import strftime
-from calibre.web.feeds.recipes import BasicNewsRecipe
+
 from calibre.web.feeds.news import BasicNewsRecipe
 class Vreme(BasicNewsRecipe):    
    title          = 'Vreme'
    __author__     = 'Darko Miletic'
    description    = 'Politicki Nedeljnik Srbije'
    publisher      = 'Vreme d.o.o.'
    category       = 'news, politics, Serbia'    
    no_stylesheets = True
-    language              = _('Serbian')
+    remove_javascript  = True
    needs_subscription = True    
    INDEX = 'http://www.vreme.com'
    LOGIN = 'http://www.vreme.com/account/index.php'
    remove_javascript     = True
    use_embedded_content  = False
    extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "monospace1";src:url(res:///opt/sony/ebook/FONT/tt0419m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{text-align: left; font-family: serif1, serif} .article_date{font-family: monospace1, monospace} .article_description{font-family: sans1, sans-serif} .navbar{font-family: monospace1, monospace}'
    html2lrf_options = [
                          '--comment', description
-                        , '--base-font-size', '10'
+                        , '--category', category
-                        , '--category', 'news, politics, Serbia'
+                        , '--publisher', publisher
                        , '--publisher', 'Vreme d.o.o.'
                        ]
    html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"' 
    preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
    def get_browser(self):
@ -68,9 +75,28 @@ class Vreme(BasicNewsRecipe):
                                })
        return [(soup.head.title.string, articles)]
    remove_tags = [
                    dict(name=['object','link'])
                   ,dict(name='table',attrs={'xclass':'image'})
                  ]
    def print_version(self, url):
        return url + '&print=yes'
    def preprocess_html(self, soup):
        del soup.body['text'   ]
        del soup.body['bgcolor']
        del soup.body['onload' ]
        mtag = '<meta http-equiv="Content-Language" content="sr-Latn"/>'
        soup.head.insert(0,mtag)
        tbl = soup.body.table
        tbbb = soup.find('td')
        if tbbb:
           tbbb.extract()
           tbl.extract()
           soup.body.insert(0,tbbb)
        return soup
    def get_cover_url(self):
        cover_url = None
        soup = self.index_to_soup(self.INDEX)
@ -78,3 +104,5 @@ class Vreme(BasicNewsRecipe):
        if cover_item:
           cover_url = self.INDEX + cover_item['src']
        return cover_url
    language              = _('Serbian')