Sync to trunk.

2025-08-30 23:00:21 -04:00 · 2011-01-31 18:56:29 -05:00 · 2011-01-31 18:56:29 -05:00 · 54e7ba109d
commit 54e7ba109d
parent 76837bbd7e 5849b45d11
75 changed files with 973 additions and 556 deletions
--- a/resources/recipes/180.recipe
+++ b/resources/recipes/180.recipe
@ -12,7 +12,7 @@ class Noticias(BasicNewsRecipe):
    title                 = '180.com.uy'
    __author__            = 'Gustavo Azambuja'
    description           = 'Noticias de Uruguay'
-    language       = 'es'
+    language       = 'es_UY'
    timefmt        = '[%a, %d %b, %Y]'
    use_embedded_content  = False
    recursion             = 5
--- a/resources/recipes/7dias.recipe
+++ b/resources/recipes/7dias.recipe
@ -20,7 +20,7 @@ class SieteDias(BasicNewsRecipe):
    no_stylesheets        = True
    use_embedded_content  = False
    encoding              = 'utf-8'
-    language = 'es'
+    language = 'es_AR'
    lang                  = 'es-AR'
    direction             = 'ltr'
--- a/resources/recipes/ambito.recipe
+++ b/resources/recipes/ambito.recipe
@ -58,4 +58,4 @@ class Ambito(BasicNewsRecipe):
            del item['style']
        return soup
-    language = 'es'
+    language = 'es_AR'
--- a/resources/recipes/animal_politico.recipe
+++ b/resources/recipes/animal_politico.recipe
@ -12,7 +12,7 @@ class AdvancedUserRecipe1290663986(BasicNewsRecipe):
    masthead_url   = 'http://www.animalpolitico.com/wp-content/themes/animal_mu/images/logo.png'
    oldest_article = 1
    max_articles_per_feed = 100
-    language       = 'es'
+    language       = 'es_MX'
    #feeds          = [(u'Animal Politico', u'http://www.animalpolitico.com/feed/')]
--- a/resources/recipes/axxon_magazine.recipe
+++ b/resources/recipes/axxon_magazine.recipe
@ -17,7 +17,7 @@ class Axxon_news(BasicNewsRecipe):
    max_articles_per_feed = 100
    no_stylesheets        = False
    use_embedded_content  = False
-    language              = 'es'
+    language              = 'es_AR'
    encoding              = 'utf-8'
    publication_type      = 'magazine'
    INDEX                 = 'http://axxon.com.ar/rev/'
--- a/resources/recipes/axxon_news.recipe
+++ b/resources/recipes/axxon_news.recipe
@ -18,7 +18,7 @@ class Axxon_news(BasicNewsRecipe):
    max_articles_per_feed = 100
    no_stylesheets        = False
    use_embedded_content  = False
-    language = 'es'
+    language = 'es_AR'
    lang                  = 'es-AR'
--- a/resources/recipes/bitacora.recipe
+++ b/resources/recipes/bitacora.recipe
@ -12,7 +12,7 @@ class General(BasicNewsRecipe):
    title                 = 'bitacora.com.uy'
    __author__            = 'Gustavo Azambuja'
    description           = 'Noticias de Uruguay'
-    language       = 'es'
+    language       = 'es_UY'
    timefmt        = '[%a, %d %b, %Y]'
    use_embedded_content  = False
    recursion             = 5
--- a/resources/recipes/buenosaireseconomico.recipe
+++ b/resources/recipes/buenosaireseconomico.recipe
@ -20,7 +20,7 @@ class BsAsEconomico(BasicNewsRecipe):
    no_stylesheets        = True
    use_embedded_content  = False
    encoding              = 'utf-8'
-    language = 'es'
+    language = 'es_AR'
    lang                  = 'es-AR'
    direction             = 'ltr'
--- a/resources/recipes/clarin.recipe
+++ b/resources/recipes/clarin.recipe
@ -18,7 +18,7 @@ class Clarin(BasicNewsRecipe):
    use_embedded_content  = False
    no_stylesheets        = True
    encoding              = 'utf8'
-    language              = 'es'
+    language              = 'es_AR'
    publication_type      = 'newspaper'
    INDEX                 = 'http://www.clarin.com'
    masthead_url          = 'http://www.clarin.com/static/CLAClarin/images/logo-clarin-print.jpg'
--- a/resources/recipes/criticadigital.recipe
+++ b/resources/recipes/criticadigital.recipe
@ -14,7 +14,7 @@ class CriticaDigital(BasicNewsRecipe):
    description           = 'Noticias de Argentina'
    oldest_article        = 2
    max_articles_per_feed = 100
-    language = 'es'
+    language = 'es_AR'
    no_stylesheets        = True
    use_embedded_content  = False
--- a/resources/recipes/cubadebate.recipe
+++ b/resources/recipes/cubadebate.recipe
@ -11,7 +11,7 @@ class CubaDebate(BasicNewsRecipe):
    __author__            = 'Darko Miletic'
    description           = 'Contra el Terorismo Mediatico'
    oldest_article        = 15
-    language              = 'es'
+    language              = 'es_CU'
    max_articles_per_feed = 100
    no_stylesheets        = True
    use_embedded_content  = False
@ -20,8 +20,8 @@ class CubaDebate(BasicNewsRecipe):
    encoding              = 'utf-8'
    masthead_url          = 'http://www.cubadebate.cu/wp-content/themes/cubadebate/images/logo.gif'
    publication_type      = 'newsportal'
-    extra_css             = """ 
+    extra_css             = """
-                               #BlogTitle{font-size: xx-large; font-weight: bold} 
+                               #BlogTitle{font-size: xx-large; font-weight: bold}
                               body{font-family: Verdana, Arial, Tahoma, sans-serif}
                            """
@ -41,7 +41,7 @@ class CubaDebate(BasicNewsRecipe):
    feeds          = [(u'Articulos', u'http://www.cubadebate.cu/feed/')]
    remove_attributes=['width','height','lang']
-    
+
    def print_version(self, url):
        return url + 'print/'
@ -50,5 +50,5 @@ class CubaDebate(BasicNewsRecipe):
            del item['style']
        for item in soup.findAll('img'):
            if not item.has_key('alt'):
-               item['alt'] = 'image'                
+               item['alt'] = 'image'
        return soup
--- a/resources/recipes/deutsche_welle_es.recipe
+++ b/resources/recipes/deutsche_welle_es.recipe
@ -16,7 +16,7 @@ class DeutscheWelle_es(BasicNewsRecipe):
    max_articles_per_feed = 100
    use_embedded_content  = False
    no_stylesheets        = True
-    language              = 'es'
+    language              = 'de_ES'
    publication_type      = 'newsportal'
    remove_empty_feeds    = True
    masthead_url          = 'http://www.dw-world.de/skins/std/channel1/pics/dw_logo1024.gif'
--- a/resources/recipes/diagonales.recipe
+++ b/resources/recipes/diagonales.recipe
@ -20,7 +20,7 @@ class Diagonales(BasicNewsRecipe):
    no_stylesheets        = True
    use_embedded_content  = False
    encoding              = 'utf-8'
-    language = 'es'
+    language = 'es_AR'
    lang                  = 'es-AR'
    direction             = 'ltr'
--- a/resources/recipes/el_mercurio_chile.recipe
+++ b/resources/recipes/el_mercurio_chile.recipe
@ -20,8 +20,8 @@ class ElMercurio(BasicNewsRecipe):
    masthead_url          = 'http://www.emol.com/especiales/logo_emol/logo_emol.gif'
    remove_javascript     = True
    use_embedded_content  = False
-    language              = 'es'
+    language              = 'es_CL'
-    
+
    conversion_options = {
                          'comment'   : description
@ -33,7 +33,7 @@ class ElMercurio(BasicNewsRecipe):
    keep_only_tags = [dict(name='div', attrs={'id':['cont_iz_titulobajada','cont_iz_creditos_1_a','cont_iz_cuerpo']})]
    remove_tags = [dict(name='div', attrs={'id':'cont_iz_cuerpo_relacionados'})]
    remove_attributes = ['height','width']
-    
+
    feeds = [
               (u'Noticias de ultima hora', u'http://rss.emol.com/rss.asp?canal=0')
              ,(u'Nacional', u'http://rss.emol.com/rss.asp?canal=1')
--- a/resources/recipes/el_observador.recipe
+++ b/resources/recipes/el_observador.recipe
@ -13,7 +13,7 @@ class ObservaDigital(BasicNewsRecipe):
    title                 = 'Observa Digital'
    __author__            = 'yrvn'
    description           = 'Noticias de Uruguay'
-    language       = 'es'
+    language       = 'es_UY'
    timefmt        = '[%a, %d %b, %Y]'
    use_embedded_content  = False
    recursion             = 5
--- a/resources/recipes/el_pais_uy.recipe
+++ b/resources/recipes/el_pais_uy.recipe
@ -14,7 +14,7 @@ class General(BasicNewsRecipe):
    description           = 'Noticias de Uruguay y el resto del mundo'
    publisher             = 'EL PAIS S.A.'
    category              = 'news, politics, Uruguay'
-    language       = 'es'
+    language       = 'es_UY'
    timefmt        = '[%a, %d %b, %Y]'
    use_embedded_content  = False
    recursion             = 2
--- a/resources/recipes/el_universal.recipe
+++ b/resources/recipes/el_universal.recipe
@ -20,7 +20,7 @@ class ElUniversal(BasicNewsRecipe):
    remove_javascript     = True
    remove_empty_feeds    = True
    publication_type      = 'newspaper'
-    language              = 'es'
+    language              = 'es_MX'
    extra_css = '''
                    body{font-family:Arial,Helvetica,sans-serif}
--- a/resources/recipes/elargentino.recipe
+++ b/resources/recipes/elargentino.recipe
@ -12,7 +12,7 @@ class ElArgentino(BasicNewsRecipe):
    __author__            = 'Darko Miletic'
    description           = 'Informacion Libre las 24 horas'
    publisher             = 'ElArgentino.com'
-    category              = 'news, politics, Argentina'    
+    category              = 'news, politics, Argentina'
    oldest_article        = 2
    max_articles_per_feed = 100
    remove_javascript     = True
@ -20,7 +20,7 @@ class ElArgentino(BasicNewsRecipe):
    use_embedded_content  = False
    encoding              = 'utf8'
    cover_url             = 'http://www.elargentino.com/TemplateWeb/MediosFooter/tapa_elargentino.png'
-    language = 'es'
+    language = 'es_AR'
    html2lrf_options = [
@ -28,16 +28,16 @@ class ElArgentino(BasicNewsRecipe):
                        , '--category', category
                        , '--publisher', publisher
                        ]
-    
+
-    html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"' 
+    html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
    remove_tags = [
                     dict(name='div', attrs={'id':'noprint'              })
                    ,dict(name='div', attrs={'class':'encabezadoImprimir'})
                    ,dict(name='a'  , attrs={'target':'_blank'           })
                  ]
-    
+
-    feeds = [ 
+    feeds = [
              (u'Portada'     , u'http://www.elargentino.com/Highlights.aspx?Content-Type=text/xml&ChannelDesc=Home'                                             )
             ,(u'Pais'        , u'http://www.elargentino.com/Highlights.aspx?ParentType=Section&ParentId=112&Content-Type=text/xml&ChannelDesc=Pa%C3%ADs'        )
             ,(u'Economia'    , u'http://www.elargentino.com/Highlights.aspx?ParentType=Section&ParentId=107&Content-Type=text/xml&ChannelDesc=Econom%C3%ADa'    )
@ -51,12 +51,12 @@ class ElArgentino(BasicNewsRecipe):
    def print_version(self, url):
        main, sep, article_part = url.partition('/nota-')
-        article_id, rsep, rrest = article_part.partition('-')    
+        article_id, rsep, rrest = article_part.partition('-')
        return u'http://www.elargentino.com/Impresion.aspx?Id=' + article_id
    def preprocess_html(self, soup):
        mtag = '<meta http-equiv="Content-Type" content="text/html; charset=utf-8">\n<meta http-equiv="Content-Language" content="es-AR"/>\n'
        soup.head.insert(0,mtag)
        for item in soup.findAll(style=True):
-            del item['style']        
+            del item['style']
        return soup
--- a/resources/recipes/elcomercio.recipe
+++ b/resources/recipes/elcomercio.recipe
@ -18,7 +18,7 @@ class ElComercio(BasicNewsRecipe):
    no_stylesheets        = True
    encoding              = 'utf-8'
    use_embedded_content  = True
-    language              = 'es'
+    language              = 'es_EC'
    masthead_url          = 'http://ww1.elcomercio.com/nv_images/headers/EC/logo_new_08.gif'
    extra_css             = ' body{font-family: Arial,Verdana,sans-serif} img{margin-bottom: 1em} '
--- a/resources/recipes/elcronista.recipe
+++ b/resources/recipes/elcronista.recipe
@ -13,7 +13,7 @@ class ElCronista(BasicNewsRecipe):
    __author__            = 'Darko Miletic'
    description           = 'Noticias de Argentina'
    oldest_article        = 2
-    language = 'es'
+    language = 'es_AR'
    max_articles_per_feed = 100
    no_stylesheets        = True
@ -25,14 +25,14 @@ class ElCronista(BasicNewsRecipe):
                        , '--category'      , 'news, Argentina'
                        , '--publisher'     , title
                        ]
-    
+
    keep_only_tags = [
                        dict(name='table', attrs={'width':'100%'             })
                       ,dict(name='h1'   , attrs={'class':'Arialgris16normal'})
                     ]
    remove_tags = [dict(name='a', attrs={'class':'Arialazul12'})]
-                     
+
    feeds = [
               (u'Economia'                , u'http://www.cronista.com/adjuntos/8/rss/Economia_EI.xml'             )
              ,(u'Negocios'                , u'http://www.cronista.com/adjuntos/8/rss/negocios_EI.xml'             )
@ -69,4 +69,4 @@ class ElCronista(BasicNewsRecipe):
        if link_item:
           cover_url = index + link_item.img['src']
        return cover_url
-        
+
--- a/resources/recipes/eltiempo_hn.recipe
+++ b/resources/recipes/eltiempo_hn.recipe
@ -21,7 +21,7 @@ class ElTiempoHn(BasicNewsRecipe):
    no_stylesheets        = True
    remove_javascript     = True
    encoding              = 'utf-8'
-    language = 'es'
+    language = 'es_HN'
    lang                  = 'es-HN'
    direction             = 'ltr'
--- a/resources/recipes/eluniversal_ve.recipe
+++ b/resources/recipes/eluniversal_ve.recipe
@ -18,7 +18,7 @@ class ElUniversal(BasicNewsRecipe):
    encoding               = 'cp1252'
    publisher              = 'El Universal'
    category               = 'news, Caracas, Venezuela, world'
-    language               = 'es'
+    language               = 'es_VE'
    cover_url              = strftime('http://static.eluniversal.com/%Y/%m/%d/portada.jpg')
    conversion_options = {
--- a/resources/recipes/eluniversalimpresa.recipe
+++ b/resources/recipes/eluniversalimpresa.recipe
@ -3,7 +3,7 @@ from calibre.web.feeds.news import BasicNewsRecipe
 class ElUniversalImpresaRecipe(BasicNewsRecipe):
    __license__  = 'GPL v3'
    __author__ = 'kwetal'
-    language = 'es'
+    language = 'es_MX'
    version = 1
    title = u'El Universal (Edici\u00F3n Impresa)'
--- a/resources/recipes/eluniverso_ec.recipe
+++ b/resources/recipes/eluniverso_ec.recipe
@ -17,7 +17,7 @@ class ElUniverso_Ecuador(BasicNewsRecipe):
    no_stylesheets        = True
    encoding              = 'utf8'
    use_embedded_content  = False
-    language              = 'es'
+    language              = 'es_EC'
    remove_empty_feeds    = True
    publication_type      = 'newspaper'
    masthead_url          = 'http://servicios2.eluniverso.com/versiones/v1/img/Hd/lg_ElUniverso.gif'
--- a/resources/recipes/explosm.recipe
+++ b/resources/recipes/explosm.recipe
@ -0,0 +1,54 @@
 from calibre.web.feeds.news import BasicNewsRecipe
 import re
 class Explosm(BasicNewsRecipe):
    title              = u'Explosm Rotated'
    __author__        = 'Andromeda Rabbit'
    description      = 'Explosm'
    language            = 'en'
    use_embedded_content = False
    no_stylesheets    = True
    oldest_article    = 24
    remove_javascript   = True
    remove_empty_feeds  = True
    max_articles_per_feed = 10
    feeds = [
             (u'Explosm Feed', u'http://feeds.feedburner.com/Explosm')
             ]
    #match_regexps = [r'http://www.explosm.net/comics/.*']
    keep_only_tags   = [dict(name='img', attrs={'alt':'Cyanide and Happiness, a daily webcomic'})]
    remove_tags = [dict(name='div'), dict(name='span'), dict(name='table'), dict(name='br'), dict(name='nobr'), dict(name='a'), dict(name='b')]
    extra_css = '''
                    h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
                    h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
                    p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
                    body{font-family:Helvetica,Arial,sans-serif;font-size:small;}'''
    def get_cover_url(self):
        return 'http://cdn.shopify.com/s/files/1/0059/1872/products/cyanidetitle_large.jpg?1295846286'
    def parse_feeds(self):
        feeds = BasicNewsRecipe.parse_feeds(self)
        for curfeed in feeds:
            delList = []
            for a,curarticle in enumerate(curfeed.articles):
                if re.search(r'http://www.explosm.net/comics', curarticle.url) == None:
                    delList.append(curarticle)
            if len(delList)>0:
                for d in delList:
                    index = curfeed.articles.index(d)
                    curfeed.articles[index:index+1] = []
        return feeds
    def skip_ad_pages(self, soup):
        # Skip ad pages served before actual article
        skip_tag = soup.find(name='img', attrs={'alt':'Cyanide and Happiness, a daily webcomic'})
        if skip_tag is None:
            return soup
        return None
--- a/resources/recipes/freeway.recipe
+++ b/resources/recipes/freeway.recipe
@ -12,7 +12,7 @@ class General(BasicNewsRecipe):
    title                 = 'freeway.com.uy'
    __author__            = 'Gustavo Azambuja'
    description           = 'Revista Freeway, Montevideo, Uruguay'
-    language       = 'es'
+    language       = 'es_UY'
    timefmt        = '[%a, %d %b, %Y]'
    use_embedded_content  = False
    recursion             = 1
--- a/resources/recipes/granma.recipe
+++ b/resources/recipes/granma.recipe
@ -20,7 +20,7 @@ class Granma(BasicNewsRecipe):
    use_embedded_content  = False
    encoding              = 'cp1252'
    cover_url             = 'http://www.granma.cubaweb.cu/imagenes/granweb229d.jpg'
-    language = 'es'
+    language = 'es_CU'
    remove_javascript     = True
--- a/resources/recipes/ieco.recipe
+++ b/resources/recipes/ieco.recipe
@ -18,7 +18,7 @@ class iEco(BasicNewsRecipe):
    encoding              = 'utf-8'
    publisher             = 'Grupo Clarin'
    category              = 'news, economia, mercados, bolsa de valores, finanzas, empresas, negocios, empleos, emprendedores, marketinguniversidades, tecnologia, agronegocios, noticias, informacion'
-    language              = 'es'
+    language              = 'es_AR'
    cover_url             = 'http://www.ieco.clarin.com/static2/images/Tapa-PDF.gif'
    extra_css             = ' #bd{font-family: sans-serif} '
--- a/resources/recipes/infobae.recipe
+++ b/resources/recipes/infobae.recipe
@ -16,7 +16,7 @@ class Infobae(BasicNewsRecipe):
    max_articles_per_feed = 100
    no_stylesheets        = True
    use_embedded_content  = False
-    language              = 'es'
+    language              = 'es_AR'
    encoding              = 'cp1252'
    masthead_url          = 'http://www.infobae.com/imgs/header/header.gif'
    remove_javascript     = True
@ -25,7 +25,7 @@ class Infobae(BasicNewsRecipe):
                              body{font-family:Arial,Helvetica,sans-serif;}
                              .popUpTitulo{color:#0D4261; font-size: xx-large}
                            '''
-    
+
    conversion_options = {
                          'comment'          : description
                        , 'tags'             : category
@ -33,7 +33,7 @@ class Infobae(BasicNewsRecipe):
                        , 'language'         : language
                        , 'linearize_tables' : True
                        }
-    
+
    feeds = [
              (u'Noticias'  , u'http://www.infobae.com/adjuntos/html/RSS/hoy.xml'       )
--- a/resources/recipes/juventudrebelde.recipe
+++ b/resources/recipes/juventudrebelde.recipe
@ -20,7 +20,7 @@ class Juventudrebelde(BasicNewsRecipe):
    no_stylesheets        = True
    use_embedded_content  = False
    encoding              = 'cp1252'
-    language = 'es'
+    language = 'es_CU'
    cover_url             = strftime('http://www.juventudrebelde.cu/UserFiles/File/impreso/iportada-%Y-%m-%d.jpg')
    remove_javascript     = True
--- a/resources/recipes/la_cuarta.recipe
+++ b/resources/recipes/la_cuarta.recipe
@ -50,4 +50,4 @@ class LaCuarta(BasicNewsRecipe):
    feeds = [(u'Noticias', u'http://lacuarta.cl/app/rss?sc=TEFDVUFSVEE=')]
-    language = 'es'
+    language = 'es_CL'
--- a/resources/recipes/la_diaria.recipe
+++ b/resources/recipes/la_diaria.recipe
@ -12,7 +12,7 @@ class General(BasicNewsRecipe):
    title                 = 'La Diaria'
    __author__            = 'Gustavo Azambuja'
    description           = 'Noticias de Uruguay'
-    language       = 'es'
+    language       = 'es_UY'
    timefmt        = '[%a, %d %b, %Y]'
    use_embedded_content  = False
    recursion             = 5
--- a/resources/recipes/la_jornada.recipe
+++ b/resources/recipes/la_jornada.recipe
@ -19,7 +19,7 @@ class LaJornada_mx(BasicNewsRecipe):
    no_stylesheets        = True
    encoding              = 'utf8'
    use_embedded_content  = False
-    language              = 'es'
+    language              = 'es_MX'
    remove_empty_feeds    = True
    cover_url             = strftime("http://www.jornada.unam.mx/%Y/%m/%d/portada.pdf")
    masthead_url          = 'http://www.jornada.unam.mx/v7.0/imagenes/la-jornada-trans.png'
@ -34,8 +34,8 @@ class LaJornada_mx(BasicNewsRecipe):
                                .credito{font-weight: bold; margin-left: 1em}
                                .credito-autor{font-variant: small-caps; font-weight: bold }
                                .credito-titulo{text-align: right}
-                                .hemero{text-align: right; font-size: 0.9em; margin-bottom: 0.5em } 
+                                .hemero{text-align: right; font-size: 0.9em; margin-bottom: 0.5em }
-                                .loc{font-weight: bold} 
+                                .loc{font-weight: bold}
                                .carton{text-align: center}
                                .credit{font-weight: bold}
                                .sumario{font-weight: bold; text-align: center}
@ -56,7 +56,7 @@ class LaJornada_mx(BasicNewsRecipe):
                                       ,re.DOTALL|re.IGNORECASE)
                                       ,lambda match: '<p class="inicial">' + match.group(1) + '</p><p class="s-s">')
                         ]
-                        
+
    keep_only_tags = [
                         dict(name='div', attrs={'class':['documentContent','cabeza','sumarios','credito-articulo','text','carton']})
                        ,dict(name='div', attrs={'id':'renderComments'})
@ -88,4 +88,4 @@ class LaJornada_mx(BasicNewsRecipe):
    def get_article_url(self, article):
        rurl = article.get('link',  None)
        return rurl.rpartition('&partner=')[0]
-        
+
--- a/resources/recipes/la_razon_bo.recipe
+++ b/resources/recipes/la_razon_bo.recipe
@ -18,7 +18,7 @@ class LaRazon_Bol(BasicNewsRecipe):
    no_stylesheets        = True
    encoding              = 'cp1252'
    use_embedded_content  = False
-    language              = 'es'
+    language              = 'es_BO'
    publication_type      = 'newspaper'
    delay                 = 1
    remove_empty_feeds    = True
--- a/resources/recipes/la_segunda.recipe
+++ b/resources/recipes/la_segunda.recipe
@ -9,7 +9,7 @@ from calibre.web.feeds.news import BasicNewsRecipe
 class LaSegunda(BasicNewsRecipe):
    title                 = 'La Segunda'
    __author__            = 'Darko Miletic'
-    description           = 'El sitio de noticias online de Chile' 
+    description           = 'El sitio de noticias online de Chile'
    publisher             = 'La Segunda'
    category              = 'news, politics, Chile'
    oldest_article        = 2
@ -19,9 +19,9 @@ class LaSegunda(BasicNewsRecipe):
    encoding              = 'cp1252'
    masthead_url          = 'http://www.lasegunda.com/imagenes/logotipo_lasegunda_Oli.gif'
    remove_empty_feeds    = True
-    language              = 'es'
+    language              = 'es_CL'
-    extra_css             = ' .titulonegritastop{font-size: xx-large; font-weight: bold} '            
+    extra_css             = ' .titulonegritastop{font-size: xx-large; font-weight: bold} '
-    
+
    conversion_options = {
                          'comment'          : description
                        , 'tags'             : category
@ -29,13 +29,13 @@ class LaSegunda(BasicNewsRecipe):
                        , 'language'         : language
 						, 'linearize_tables' : True
                        }
-                        
+
    remove_tags_before = dict(attrs={'class':'titulonegritastop'})
    remove_tags        = [dict(name='img')]
    remove_attributes  = ['width','height']
-	
+
-                        
+
-    feeds = [ 
+    feeds = [
               (u'Noticias de ultima hora', u'http://www.lasegunda.com/rss20/index.asp?canal=0')
              ,(u'Politica'               , u'http://www.lasegunda.com/rss20/index.asp?canal=21')
              ,(u'Cronica'                , u'http://www.lasegunda.com/rss20/index.asp?canal=20')
@ -49,6 +49,6 @@ class LaSegunda(BasicNewsRecipe):
            ]
    def print_version(self, url):
-        rest, sep, article_id = url.partition('index.asp?idnoticia=')        
+        rest, sep, article_id = url.partition('index.asp?idnoticia=')
        return u'http://www.lasegunda.com/edicionOnline/include/secciones/_detalle_impresion.asp?idnoticia=' + article_id
-    
+
--- a/resources/recipes/lamujerdemivida.recipe
+++ b/resources/recipes/lamujerdemivida.recipe
@ -11,15 +11,15 @@ from calibre.web.feeds.news import BasicNewsRecipe
 class LaMujerDeMiVida(BasicNewsRecipe):
    title                 = 'La Mujer de mi Vida'
    __author__            = 'Darko Miletic'
-    description           = 'Cultura de otra manera'    
+    description           = 'Cultura de otra manera'
    oldest_article        = 90
    max_articles_per_feed = 100
    no_stylesheets        = True
    use_embedded_content  = False
    encoding              = 'cp1252'
    publisher             = 'La Mujer de mi Vida'
-    category              = 'literatura, critica, arte, ensayos'    
+    category              = 'literatura, critica, arte, ensayos'
-    language = 'es'
+    language = 'es_AR'
    INDEX                 = 'http://www.lamujerdemivida.com.ar/'
    html2lrf_options = [
@ -28,8 +28,8 @@ class LaMujerDeMiVida(BasicNewsRecipe):
                        , '--publisher', publisher
                        , '--ignore-tables'
                        ]
-    
+
-    html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True' 
+    html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True'
    keep_only_tags = [dict(name='table', attrs={'width':'570'})]
@ -51,7 +51,7 @@ class LaMujerDeMiVida(BasicNewsRecipe):
        if cover_item:
           cover_url = self.INDEX + cover_item['src']
        return cover_url
-    
+
    def parse_index(self):
        totalfeeds = []
        lfeeds = self.get_feeds()
@ -74,4 +74,4 @@ class LaMujerDeMiVida(BasicNewsRecipe):
                                    })
            totalfeeds.append((feedtitle, articles))
        return totalfeeds
-                
+
--- a/resources/recipes/lanacion.recipe
+++ b/resources/recipes/lanacion.recipe
@ -16,17 +16,17 @@ class Lanacion(BasicNewsRecipe):
    max_articles_per_feed = 100
    use_embedded_content  = False
    no_stylesheets        = True
-    language              = 'es'
+    language              = 'es_AR'
    publication_type      = 'newspaper'
-    remove_empty_feeds    = True    
+    remove_empty_feeds    = True
    masthead_url          = 'http://www.lanacion.com.ar/imgs/layout/logos/ln341x47.gif'
    extra_css             = """ h1{font-family: Georgia,serif}
-                                h2{color: #626262}    
+                                h2{color: #626262}
-                                body{font-family: Arial,sans-serif} 
+                                body{font-family: Arial,sans-serif}
                                img{margin-top: 0.5em; margin-bottom: 0.2em; display: block}
-                                .notaFecha{color: #808080}                                
+                                .notaFecha{color: #808080}
-                                .notaEpigrafe{font-size: x-small} 
+                                .notaEpigrafe{font-size: x-small}
-                                .topNota h1{font-family: Arial,sans-serif} 
+                                .topNota h1{font-family: Arial,sans-serif}
                            """
@ -45,7 +45,7 @@ class Lanacion(BasicNewsRecipe):
                    ,dict(attrs={'class':['titulosMultimedia','derecha','techo color','encuesta','izquierda compartir','floatFix','videoCentro']})
                    ,dict(name=['iframe','embed','object','form','base','hr','meta','link','input'])
                  ]
-    remove_tags_after = dict(attrs={'class':['tags','nota-destacado']})                
+    remove_tags_after = dict(attrs={'class':['tags','nota-destacado']})
    remove_attributes = ['height','width','visible','onclick','data-count','name']
    feeds          = [
--- a/resources/recipes/lanacion_chile.recipe
+++ b/resources/recipes/lanacion_chile.recipe
@ -51,4 +51,4 @@ class LaNacionChile(BasicNewsRecipe):
            del item['style']
        return soup
-    language = 'es'
+    language = 'es_CL'
--- a/resources/recipes/laprensa.recipe
+++ b/resources/recipes/laprensa.recipe
@ -21,9 +21,9 @@ class LaPrensa(BasicNewsRecipe):
    encoding              = 'cp1252'
   # cover_url             = 'http://www.laprensa.com.ar/imgs/logo.gif'
    remove_javascript     = True
-    language = 'es'
+    language = 'es_AR'
    lang = 'es'
-    
+
    html2lrf_options = [
                          '--comment', description
                        , '--category', category
@ -32,7 +32,7 @@ class LaPrensa(BasicNewsRecipe):
    html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
    filter_regexps = [r'.*archive.aspx.*']
-   
+
    remove_tags  = [
                    dict(name='td', attrs={'class':["link-registro","link-buscador"]}),
                    dict(name='td', attrs={'id':["TDTabItem1","TDTabItem2","TDTabItem3","TDTabItem4"]}),
@ -58,9 +58,9 @@ class LaPrensa(BasicNewsRecipe):
                    dict(name='img', src = "/versions/1/imgs/separador-linea-azul.gif"),
                    dict(name='img', src = " /versions/1/imgs/separador-linea.gif"),
                    dict(name='a',text ="Powered by Civinext Groupware - V. 2.0.3567.23706"),
-                    dict(name='img', height ="0")                   
+                    dict(name='img', height ="0")
                    ]
-                            
+
    extra_css = '''
                    .seccion{font-size:xx-small;}
                    body{font-family:Arial,Helvetica,sans-serif;font-size:x-small;}
@ -69,7 +69,7 @@ class LaPrensa(BasicNewsRecipe):
                    .fecha{font-size:xx-small;}
                    .volanta{font-size:xx-small;}
                '''
-    
+
    feeds = [
              (u'Politica'    , u'http://www.laprensa.com.ar/ResourcesManager.aspx?Resource=Rss.aspx&Rss=4' )
             ,(u'Economia'    , u'http://www.laprensa.com.ar/ResourcesManager.aspx?Resource=Rss.aspx&Rss=5' )
@ -80,14 +80,14 @@ class LaPrensa(BasicNewsRecipe):
             ,(u'Espectaculos', u'http://www.laprensa.com.ar/ResourcesManager.aspx?Resource=Rss.aspx?Rss=10')
            ]
-    
+
    def preprocess_html(self, soup):
-        
+
        for t in soup.findAll(['table','td','tr','span','tbody']):
            t.name = 'div'
        for t in soup.findAll(['hr']):
            t.extract()
-        
+
        mtag = '<meta http-equiv="Content-Language" content="es-AR"/>'
        soup.head.insert(0,mtag)
        for item in soup.findAll(style=True):
@ -95,8 +95,8 @@ class LaPrensa(BasicNewsRecipe):
        for item in soup.findAll(align = "center"):
                del item['align']
        for item in soup.findAll(bgcolor="ffffff"):
-            del item['bgcolor']               
+            del item['bgcolor']
        return soup
-    
+
-   
+
-    
+
--- a/resources/recipes/laprensa_hn.recipe
+++ b/resources/recipes/laprensa_hn.recipe
@ -21,7 +21,7 @@ class LaPrensaHn(BasicNewsRecipe):
    no_stylesheets        = True
    remove_javascript     = True
    encoding              = 'utf-8'
-    language = 'es'
+    language = 'es_HN'
    lang                  = 'es-HN'
    direction             = 'ltr'
--- a/resources/recipes/laprensa_ni.recipe
+++ b/resources/recipes/laprensa_ni.recipe
@ -22,7 +22,7 @@ class LaPrensa_ni(BasicNewsRecipe):
    use_embedded_content  = False
    encoding              = 'cp1252'
    remove_javascript     = True
-    language = 'es'
+    language = 'es_NI'
    months_es             = ['enero','febrero','marzo','abril','mayo','junio','julio','agosto','septiembre','octubre','noviembre','diciembre']
    current_month         = months_es[datetime.date.today().month - 1]
--- a/resources/recipes/latimes.recipe
+++ b/resources/recipes/latimes.recipe
@ -1,73 +1,92 @@
 #!/usr/bin/env  python
 __license__   = 'GPL v3'
-__copyright__ = '2008-2009, Darko Miletic <darko.miletic at gmail.com>'
+__copyright__ = '2008-2011, Darko Miletic <darko.miletic at gmail.com>'
 '''
-latimes.com
+www.latimes.com
 '''
 from calibre.web.feeds.news import BasicNewsRecipe
 class LATimes(BasicNewsRecipe):
-    title                 = u'The Los Angeles Times'
+    title                 = 'Los Angeles Times'
-    __author__            = u'Darko Miletic and Sujata Raman'
+    __author__            = 'Darko Miletic'
-    description           = u'News from Los Angeles'
+    description           = 'The Los Angeles Times is a leading source of news on Southern California, entertainment, movies, television, music, politics, business, health, technology, travel, sports, environment, economics, autos, jobs, real estate and other topics affecting California'
-    oldest_article        = 7
+    publisher             = 'Tribune Company'
-    max_articles_per_feed = 100
+    category              = 'news, politics, USA, Los Angeles, world'
-    language              = 'en'
+    oldest_article        = 2
    max_articles_per_feed = 200
    no_stylesheets        = True
    encoding              = 'utf8'
    use_embedded_content  = False
-    encoding              = 'utf-8'
+    language              = 'en'
-    lang                  = 'en-US'
+    remove_empty_feeds    = True
    publication_type      = 'newspaper'
    masthead_url          = 'http://www.latimes.com/images/logo.png'
    cover_url             = 'http://www.latimes.com/includes/sectionfronts/A1.pdf'
    extra_css             = """
                               body{font-family: Georgia,"Times New Roman",Times,serif }
                               img{margin-bottom: 0.4em; margin-top: 0.8em; display:block}
                               h2{font-size: 1.1em}
                               .deckhead{font-size: small; text-transform: uppercase}
                               .small{color: gray; font-size: small}
                               .date,.time,.copyright{font-size: x-small; color:gray; font-style:italic;}
                            """
    conversion_options = {
-          'comment'          : description
+                          'comment'          : description
-        , 'language'         : lang
+                        , 'tags'             : category
-    }
+                        , 'publisher'        : publisher
                        , 'language'         : language
                        , 'linearize_tables' : 'Yes'
                        }
-    extra_css = '''
+    keep_only_tags = [
-                h1{font-family :Georgia,"Times New Roman",Times,serif; font-size:large; }
+                        dict(name='div', attrs={'class':'story'})
-                h2{font-family :Georgia,"Times New Roman",Times,serif; font-size:x-small;}
+                       ,dict(attrs={'class':['entry-header','time','entry-content']})
-                .story{font-family :Georgia,"Times New Roman",Times,serif; font-size: x-small;}
+                     ]
-                .entry-body{font-family :Georgia,"Times New Roman",Times,serif; font-size: x-small;}
+    remove_tags_after=dict(name='p', attrs={'class':'copyright'})
-                .entry-more{font-family :Georgia,"Times New Roman",Times,serif; font-size: x-small;}
+    remove_tags = [
-                .credit{color:#666666; font-family :Georgia,"Times New Roman",Times,serif; font-size: xx-small;}
+                     dict(name=['meta','link','iframe','object','embed'])
-                .small{color:#666666; font-family :Georgia,"Times New Roman",Times,serif; font-size: xx-small;}
+                    ,dict(attrs={'class':['toolSet','articlerail','googleAd','entry-footer-left','entry-footer-right','entry-footer-social','google-ad-story-bottom','sphereTools']})
-                .byline{font-family :Georgia,"Times New Roman",Times,serif; font-size: xx-small;}
+                    ,dict(attrs={'id':['article-promo','googleads','moduleArticleToolsContainer','gallery-subcontent']})
-                .date{font-family :Georgia,"Times New Roman",Times,serif; font-size: xx-small;color:#930000; font-style:italic;}
+                  ]
-                .time{font-family :Georgia,"Times New Roman",Times,serif; font-size: xx-small;color:#930000; font-style:italic;}
+    remove_attributes=['lang','xmlns:fb','xmlns:og','border','xtags','i','article_body']
                .copyright{font-family :Georgia,"Times New Roman",Times,serif; font-size: xx-small;color:#930000; }
                .subhead{font-family :Georgia,"Times New Roman",Times,serif; font-size:x-small;}
                '''
   # recursions = 1
   # match_regexps = [r'http://www.latimes.com/.*page=[2-9]']
    keep_only_tags    = [dict(name='div', attrs={'class':["story"  ,"entry"] })]
-    remove_tags      = [   dict(name='div', attrs={'class':['articlerail',"sphereTools","tools","toppaginate","entry-footer-left","entry-footer-right"]}),
+    feeds = [
-                            dict(name='div', attrs={'id':["moduleArticleToolsContainer",]}),
+              (u'Top News'             , u'http://feeds.latimes.com/latimes/news'                           )
-                            dict(name='p', attrs={'class':["entry-footer",]}),
+             ,(u'Local News'           , u'http://feeds.latimes.com/latimes/news/local'                     )
-                           dict(name='ul', attrs={'class':"article-nav clearfix"}),
+             ,(u'National'             , u'http://feeds.latimes.com/latimes/news/nationworld/nation'        )
-                            dict(name=['iframe'])
+             ,(u'National Politics'    , u'http://feeds.latimes.com/latimes/news/politics/'                 )
-                        ]
+             ,(u'Business'             , u'http://feeds.latimes.com/latimes/business'                       )
-
+             ,(u'Education'            , u'http://feeds.latimes.com/latimes/news/education'                 )
-
+             ,(u'Environment'          , u'http://feeds.latimes.com/latimes/news/science/environment'       )
-    feeds          = [(u'News', u'http://feeds.latimes.com/latimes/news')
+             ,(u'Religion'             , u'http://feeds.latimes.com/latimes/features/religion'              )
-                      ,(u'Local','http://feeds.latimes.com/latimes/news/local')
+             ,(u'Science'              , u'http://feeds.latimes.com/latimes/news/science'                   )
-                      ,(u'MostEmailed','http://feeds.latimes.com/MostEmailed')
+             ,(u'Technology'           , u'http://feeds.latimes.com/latimes/technology'                     )
-                      ,(u'Politics','http://feeds.latimes.com/latimes/news/local/politics/cal/')
+             ,(u'Africa'               , u'http://feeds.latimes.com/latimes/africa'                         )
-                      ,('OrangeCounty','http://feeds.latimes.com/latimes/news/local/orange/')
+             ,(u'Asia'                 , u'http://feeds.latimes.com/latimes/asia'                           )
-                      ,('National','http://feeds.latimes.com/latimes/news/nationworld/nation')
+             ,(u'Europe'               , u'http://feeds.latimes.com/latimes/europe'                         )
-                      ,('Politics','http://feeds.latimes.com/latimes/news/politics/')
+             ,(u'Latin America'        , u'http://feeds.latimes.com/latimes/latinamerica'                   )
-                      ,('Business','http://feeds.latimes.com/latimes/business')
+             ,(u'Middle East'          , u'http://feeds.latimes.com/latimes/middleeast'                     )
-                      ,('Sports','http://feeds.latimes.com/latimes/sports/')
+             ,(u'Arts&Culture'         , u'http://feeds.feedburner.com/latimes/entertainment/news/arts'     )
-                      ,('Entertainment','http://feeds.latimes.com/latimes/entertainment/')
+             ,(u'Entertainment News'   , u'http://feeds.feedburner.com/latimes/entertainment/news/'         )
-                      ]
+             ,(u'Movie News'           , u'http://feeds.feedburner.com/latimes/entertainment/news/movies/'  )
-
+             ,(u'Movie Reviews'        , u'http://feeds.feedburner.com/movies/reviews/'                     )
             ,(u'Music News'           , u'http://feeds.feedburner.com/latimes/entertainment/news/music/'   )
             ,(u'Pop Album Reviews'    , u'http://feeds.feedburner.com/latimes/pop-album-reviews'           )
             ,(u'Restaurant Reviews'   , u'http://feeds.feedburner.com/latimes/restaurant/reviews'          )
             ,(u'Theatar and Dance'    , u'http://feeds.feedburner.com/latimes/theaterdance'                )
             ,(u'Autos'                , u'http://feeds.latimes.com/latimes/classified/automotive/highway1/')
             ,(u'Books'                , u'http://feeds.latimes.com/features/books'                         )
             ,(u'Food'                 , u'http://feeds.latimes.com/latimes/features/food/'                 )
             ,(u'Health'               , u'http://feeds.latimes.com/latimes/features/health/'               )
             ,(u'Real Estate'          , u'http://feeds.latimes.com/latimes/classified/realestate/'         )
             ,(u'Commentary'           , u'http://feeds2.feedburner.com/latimes/news/opinion/commentary/'   )
             ,(u'Sports'               , u'http://feeds.latimes.com/latimes/sports/'                        )
            ]
    def get_article_url(self, article):
-        ans = article.get('feedburner_origlink').rpartition('?')[0]
+        ans = BasicNewsRecipe.get_article_url(self, article).rpartition('?')[0]
        try:
            self.log('Looking for full story link in', ans)
@ -83,4 +102,22 @@ class LATimes(BasicNewsRecipe):
            pass
        return ans
-
+    def preprocess_html(self, soup):
        for item in soup.findAll(style=True):
            del item['style']
        for item in soup.findAll('img'):
            if not item.has_key('alt'):
               item['alt'] = 'image'
        for item in soup.findAll('a'):
            limg = item.find('img')
            if item.string is not None:
               str = item.string
               item.replaceWith(str)
            else:
               if limg:
                  item.name  ='div'
                  item.attrs =[]
               else:
                   str = self.tag_to_string(item)
                   item.replaceWith(str)
        return soup
--- a/resources/recipes/latribuna.recipe
+++ b/resources/recipes/latribuna.recipe
@ -21,7 +21,7 @@ class LaTribuna(BasicNewsRecipe):
    no_stylesheets        = True
    remove_javascript     = True
    encoding              = 'utf-8'
-    language = 'es'
+    language = 'es_HN'
    lang                  = 'es-HN'
    direction             = 'ltr'
--- a/resources/recipes/los_tiempos_bo.recipe
+++ b/resources/recipes/los_tiempos_bo.recipe
@ -18,7 +18,7 @@ class LosTiempos_Bol(BasicNewsRecipe):
    no_stylesheets        = True
    encoding              = 'cp1252'
    use_embedded_content  = False
-    language              = 'es'
+    language              = 'es_BO'
    publication_type      = 'newspaper'
    delay                 = 1
    remove_empty_feeds    = True
--- a/resources/recipes/milenio.recipe
+++ b/resources/recipes/milenio.recipe
@ -12,7 +12,7 @@ import datetime
 class Milenio(BasicNewsRecipe):
    title                 = u'Milenio-diario'
    __author__            = 'Bmsleight'
-    language              = 'es'
+    language              = 'es_MX'
    description           = 'Milenio-diario'
    oldest_article        = 10
    max_articles_per_feed = 100
--- a/resources/recipes/miradasalsur.recipe
+++ b/resources/recipes/miradasalsur.recipe
@ -20,7 +20,7 @@ class MiradasAlSur(BasicNewsRecipe):
    no_stylesheets        = True
    use_embedded_content  = False
    encoding              = 'utf-8'
-    language = 'es'
+    language = 'es_AR'
    lang                  = 'es-AR'
    direction             = 'ltr'
--- a/resources/recipes/montevideo_com.recipe
+++ b/resources/recipes/montevideo_com.recipe
@ -12,7 +12,7 @@ class Noticias(BasicNewsRecipe):
    title                 = 'Montevideo COMM'
    __author__            = 'Gustavo Azambuja'
    description           = 'Noticias de Uruguay'
-    language       = 'es'
+    language       = 'es_UY'
    timefmt        = '[%a, %d %b, %Y]'
    use_embedded_content  = False
    recursion             = 5
--- a/resources/recipes/newsweek_argentina.recipe
+++ b/resources/recipes/newsweek_argentina.recipe
@ -20,7 +20,7 @@ class Newsweek_Argentina(BasicNewsRecipe):
    no_stylesheets        = True
    use_embedded_content  = False
    encoding              = 'utf-8'
-    language = 'es'
+    language = 'es_AR'
    lang                  = 'es-AR'
    direction             = 'ltr'
--- a/resources/recipes/observa_digital.recipe
+++ b/resources/recipes/observa_digital.recipe
@ -12,7 +12,7 @@ class Noticias(BasicNewsRecipe):
    title                 = 'Observa Digital'
    __author__ = '2010, Gustavo Azambuja <hola at gazambuja.com>'
    description           = 'Noticias desde Uruguay'
-    language       = 'es'
+    language       = 'es_UY'
    timefmt        = '[%a, %d %b, %Y]'
    use_embedded_content  = False
    recursion             = 5
--- a/resources/recipes/pagina12.recipe
+++ b/resources/recipes/pagina12.recipe
@ -19,15 +19,15 @@ class Pagina12(BasicNewsRecipe):
    no_stylesheets        = True
    encoding              = 'cp1252'
    use_embedded_content  = False
-    language              = 'es'
+    language              = 'es_AR'
    remove_empty_feeds    = True
    publication_type      = 'newspaper'
    masthead_url          = 'http://www.pagina12.com.ar/commons/imgs/logo-home.gif'
-    extra_css             = """ 
+    extra_css             = """
-                               body{font-family: Arial,Helvetica,sans-serif } 
+                               body{font-family: Arial,Helvetica,sans-serif }
                               img{margin-bottom: 0.4em; display:block}
-                               #autor{font-weight: bold} 
+                               #autor{font-weight: bold}
-                               #fecha,#epigrafe{font-size: 0.9em; margin: 5px} 
+                               #fecha,#epigrafe{font-size: 0.9em; margin: 5px}
                               #imagen{border: 1px solid black; margin: 0 0 1.25em 1.25em; width: 232px }
                               .fgprincipal{font-size: large; font-weight: bold}
                            """
@ -83,7 +83,7 @@ class Pagina12(BasicNewsRecipe):
            del it['href']
            del it['title']
        for item in soup.findAll('p'):
-            it = item.find('h3')            
+            it = item.find('h3')
            if it:
               it.name='span'
-        return soup
+        return soup
--- a/resources/recipes/perfil.recipe
+++ b/resources/recipes/perfil.recipe
@ -17,7 +17,7 @@ class Perfil(BasicNewsRecipe):
    no_stylesheets        = True
    encoding              = 'cp1252'
    use_embedded_content  = False
-    language              = 'es'
+    language              = 'es_AR'
    remove_empty_feeds    = True
    masthead_url          = 'http://www.perfil.com/export/sites/diarioperfil/arte/10/logo_perfilcom_mm.gif'
    extra_css             = """
--- a/resources/recipes/reptantes.recipe
+++ b/resources/recipes/reptantes.recipe
@ -13,7 +13,7 @@ class Reptantes(BasicNewsRecipe):
    description           = u"cada vez que te haces acupuntura, tu muñeco vudú sufre en algún lado"
    oldest_article        = 130
    max_articles_per_feed = 100
-    language              = 'es'
+    language              = 'es_AR'
    encoding              = 'utf-8'
    no_stylesheets        = True
    use_embedded_content  = False
--- a/resources/recipes/revista_bla.recipe
+++ b/resources/recipes/revista_bla.recipe
@ -12,7 +12,7 @@ class Noticias(BasicNewsRecipe):
    title                 = 'Revista Bla'
    __author__            = 'Gustavo Azambuja'
    description           = 'Moda | Uruguay'
-    language       = 'es'
+    language       = 'es_UY'
    timefmt        = '[%a, %d %b, %Y]'
    use_embedded_content  = False
    recursion             = 5
--- a/resources/recipes/veintitres.recipe
+++ b/resources/recipes/veintitres.recipe
@ -20,7 +20,7 @@ class Veintitres(BasicNewsRecipe):
    no_stylesheets        = True
    use_embedded_content  = False
    encoding              = 'utf-8'
-    language = 'es'
+    language = 'es_AR'
    lang                  = 'es-AR'
    direction             = 'ltr'
--- a/setup/installer/linux/freeze2.py
+++ b/setup/installer/linux/freeze2.py
@ -360,6 +360,9 @@ class LinuxFreeze(Command):
            def main():
                try:
                    sys.argv[0] = sys.calibre_basename
                    dfv = os.environ.get('CALIBRE_DEVELOP_FROM', None)
                    if dfv and os.path.exists(dfv):
                        sys.path.insert(0, os.path.abspath(dfv))
                    set_default_encoding()
                    set_helper()
                    set_qt_plugin_path()
--- a/src/calibre/ebooks/chm/reader.py
+++ b/src/calibre/ebooks/chm/reader.py
@ -139,6 +139,13 @@ class CHMReader(CHMFile):
        if self.hhc_path not in files and files:
            self.hhc_path = files[0]
        if self.hhc_path == '.hhc' and self.hhc_path not in files:
            from calibre import walk
            for x in walk(output_dir):
                if os.path.basename(x).lower() in ('index.htm', 'index.html'):
                    self.hhc_path = os.path.relpath(x, output_dir)
                    break
    def _reformat(self, data, htmlpath):
        try:
            data = xml_to_unicode(data, strip_encoding_pats=True)[0]
--- a/src/calibre/ebooks/comic/input.py
+++ b/src/calibre/ebooks/comic/input.py
@ -53,7 +53,7 @@ def find_pages(dir, sort_on_mtime=False, verbose=False):
        prints('\t'+'\n\t'.join([os.path.basename(p) for p in pages]))
    return pages
-class PageProcessor(list):
+class PageProcessor(list): # {{{
    '''
    Contains the actual image rendering logic. See :method:`render` and
    :method:`process_pages`.
@ -111,6 +111,13 @@ class PageProcessor(list):
            SCRWIDTH, SCRHEIGHT = self.opts.output_profile.comic_screen_size
            try:
                if self.opts.comic_image_size:
                    SCRWIDTH, SCRHEIGHT = map(int, [x.strip() for x in
                        self.opts.comic_image_size.split('x')])
            except:
                pass # Ignore
            if self.opts.keep_aspect_ratio:
                # Preserve the aspect ratio by adding border
                aspect = float(sizex) / float(sizey)
@ -170,6 +177,7 @@ class PageProcessor(list):
                dest = dest[:-1]
                os.rename(dest+'8', dest)
            self.append(dest)
 # }}}
 def render_pages(tasks, dest, opts, notification=lambda x, y: x):
    '''
@ -291,7 +299,11 @@ class ComicInput(InputFormatPlugin):
        OptionRecommendation(name='no_process', recommended_value=False,
              help=_("Apply no processing to the image")),
        OptionRecommendation(name='dont_grayscale', recommended_value=False,
-            help=_('Do not convert the image to grayscale (black and white)'))
+            help=_('Do not convert the image to grayscale (black and white)')),
        OptionRecommendation(name='comic_image_size', recommended_value=None,
            help=_('Specify the image size as widthxheight pixels. Normally,'
                ' an image size is automatically calculated from the output '
                'profile, this option overrides it.')),
        ])
    recommendations = set([
--- a/src/calibre/ebooks/conversion/utils.py
+++ b/src/calibre/ebooks/conversion/utils.py
@ -24,10 +24,11 @@ class HeuristicProcessor(object):
        self.chapters_no_title = 0
        self.chapters_with_title = 0
        self.blanks_deleted = False
        self.blanks_between_paragraphs = False
        self.linereg = re.compile('(?<=<p).*?(?=</p>)', re.IGNORECASE|re.DOTALL)
-        self.blankreg = re.compile(r'\s*(?P<openline><p(?!\sclass=\"softbreak\")[^>]*>)\s*(?P<closeline></p>)', re.IGNORECASE)
+        self.blankreg = re.compile(r'\s*(?P<openline><p(?!\sclass=\"(softbreak|spacer)\")[^>]*>)\s*(?P<closeline></p>)', re.IGNORECASE)
-        self.softbreak = re.compile(r'\s*(?P<openline><p(?=\sclass=\"softbreak\")[^>]*>)\s*(?P<closeline></p>)', re.IGNORECASE)
+        self.anyblank = re.compile(r'\s*(?P<openline><p[^>]*>)\s*(?P<closeline></p>)', re.IGNORECASE)
-        self.multi_blank = re.compile(r'(\s*<p[^>]*>\s*</p>){2,}', re.IGNORECASE)
+        self.multi_blank = re.compile(r'(\s*<p[^>]*>\s*</p>){2,}(?!\s*<h\d)', re.IGNORECASE)
    def is_pdftohtml(self, src):
        return '<!-- created by calibre\'s pdftohtml -->' in src[:1000]
@ -42,8 +43,10 @@ class HeuristicProcessor(object):
                    " chapters. - " + unicode(chap))
            return '<h2>'+chap+'</h2>\n'
        else:
-            txt_chap = html2text(chap)
+            delete_whitespace = re.compile('^\s*(?P<c>.*?)\s*$')
-            txt_title = html2text(title)
+            delete_quotes = re.compile('\'\"')
            txt_chap = delete_quotes.sub('', delete_whitespace.sub('\g<c>', html2text(chap)))
            txt_title = delete_quotes.sub('', delete_whitespace.sub('\g<c>', html2text(title)))
            self.html_preprocess_sections = self.html_preprocess_sections + 1
            self.log.debug("marked " + unicode(self.html_preprocess_sections) +
                    " chapters & titles. - " + unicode(chap) + ", " + unicode(title))
@ -375,9 +378,9 @@ class HeuristicProcessor(object):
        html = re.sub('<p\s?/>', '', html)
        # Get rid of empty span, bold, font, em, & italics tags
        html = re.sub(r"\s*<span[^>]*>\s*(<span[^>]*>\s*</span>){0,2}\s*</span>\s*", " ", html)
-        html = re.sub(r"\s*<(font|[ibu]|em)[^>]*>\s*(<(font|[ibu]|em)[^>]*>\s*</(font|[ibu]|em)>\s*){0,2}\s*</(font|[ibu]|em)>", " ", html)
+        html = re.sub(r"\s*<(font|[ibu]|em|strong)[^>]*>\s*(<(font|[ibu]|em|strong)[^>]*>\s*</(font|[ibu]|em|strong)>\s*){0,2}\s*</(font|[ibu]|em|strong)>", " ", html)
        html = re.sub(r"\s*<span[^>]*>\s*(<span[^>]>\s*</span>){0,2}\s*</span>\s*", " ", html)
-        html = re.sub(r"\s*<(font|[ibu]|em)[^>]*>\s*(<(font|[ibu]|em)[^>]*>\s*</(font|[ibu]|em)>\s*){0,2}\s*</(font|[ibu]|em)>", " ", html)
+        html = re.sub(r"\s*<(font|[ibu]|em|strong)[^>]*>\s*(<(font|[ibu]|em|strong)[^>]*>\s*</(font|[ibu]|em|strong)>\s*){0,2}\s*</(font|[ibu]|em|strong)>", " ", html)
        self.deleted_nbsps = True
        return html
@ -416,6 +419,28 @@ class HeuristicProcessor(object):
                return True
        return False
    def detect_blank_formatting(self, html):
        blanks_before_headings = re.compile(r'(\s*<p[^>]*>\s*</p>){1,}(?=\s*<h\d)', re.IGNORECASE)
        blanks_after_headings = re.compile(r'(?<=</h\d>)(\s*<p[^>]*>\s*</p>){1,}', re.IGNORECASE)
        def markup_spacers(match):
           blanks = match.group(0)
           blanks = self.blankreg.sub('\n<p class="spacer"> </p>', blanks)
           return blanks
        html = blanks_before_headings.sub(markup_spacers, html)
        html = blanks_after_headings.sub(markup_spacers, html)
        if self.html_preprocess_sections > self.min_chapters:
            html = re.sub('(?si)^.*?(?=<h\d)', markup_spacers, html)
        return html
    def detect_soft_breaks(self, html):
        if not self.blanks_deleted and self.blanks_between_paragraphs:
            html = self.multi_blank.sub('\n<p class="softbreak" style="margin-top:1.25em; margin-bottom:1.25em; page-break-before:avoid"> </p>', html)
        else:
            html = self.blankreg.sub('\n<p class="softbreak" style="margin-top:1.25em; margin-bottom:1.25em; page-break-before:avoid"> </p>', html)
        return html
    def __call__(self, html):
        self.log.debug("*********  Heuristic processing HTML  *********")
@ -457,23 +482,23 @@ class HeuristicProcessor(object):
        #html = re.sub('<br[^>]*>', u'<p>\u00a0</p>', html)
        # Determine whether the document uses interleaved blank lines
-        blanks_between_paragraphs = self.analyze_blanks(html)
+        self.blanks_between_paragraphs = self.analyze_blanks(html)
        #self.dump(html, 'before_chapter_markup')
        # detect chapters/sections to match xpath or splitting logic
        if getattr(self.extra_opts, 'markup_chapter_headings', False):
-            html = self.markup_chapters(html, self.totalwords, blanks_between_paragraphs)
+            html = self.markup_chapters(html, self.totalwords, self.blanks_between_paragraphs)
        if getattr(self.extra_opts, 'italicize_common_cases', False):
            html = self.markup_italicis(html)
        # If more than 40% of the lines are empty paragraphs and the user has enabled delete
        # blank paragraphs then delete blank lines to clean up spacing
-        if blanks_between_paragraphs and getattr(self.extra_opts, 'delete_blank_paragraphs', False):
+        if self.blanks_between_paragraphs and getattr(self.extra_opts, 'delete_blank_paragraphs', False):
            self.log.debug("deleting blank lines")
            self.blanks_deleted = True
-            html = self.multi_blank.sub('\n<p class="softbreak" style="margin-top:1.5em; margin-bottom:1.5em"> </p>', html)
+            html = self.multi_blank.sub('\n<p class="softbreak" style="margin-top:1.25em; margin-bottom:1.25em; page-break-before:avoid"> </p>', html)
            html = self.blankreg.sub('', html)
        # Determine line ending type
@ -525,14 +550,13 @@ class HeuristicProcessor(object):
            html = doubleheading.sub('\g<firsthead>'+'\n<h3'+'\g<secondhead>'+'</h3>', html)
        if getattr(self.extra_opts, 'format_scene_breaks', False):
            html = self.detect_blank_formatting(html)
            html = self.detect_soft_breaks(html)
            # Center separator lines
-            html = re.sub(u'<(?P<outer>p|div)[^>]*>\s*(<(?P<inner1>font|span|[ibu])[^>]*>)?\s*(<(?P<inner2>font|span|[ibu])[^>]*>)?\s*(<(?P<inner3>font|span|[ibu])[^>]*>)?\s*(?P<break>([*#•=✦]+\s*)+)\s*(</(?P=inner3)>)?\s*(</(?P=inner2)>)?\s*(</(?P=inner1)>)?\s*</(?P=outer)>', '<p style="text-align:center; margin-top:1.25em; margin-bottom:1.25em">' + '\g<break>' + '</p>', html)
+            html = re.sub(u'<(?P<outer>p|div)[^>]*>\s*(<(?P<inner1>font|span|[ibu])[^>]*>)?\s*(<(?P<inner2>font|span|[ibu])[^>]*>)?\s*(<(?P<inner3>font|span|[ibu])[^>]*>)?\s*(?P<break>([*#•=✦]+\s*)+)\s*(</(?P=inner3)>)?\s*(</(?P=inner2)>)?\s*(</(?P=inner1)>)?\s*</(?P=outer)>', '<p style="text-align:center; margin-top:1.25em; margin-bottom:1.25em; page-break-before:avoid">' + '\g<break>' + '</p>', html)
-            if not self.blanks_deleted:
+            #html = re.sub('<p\s+class="softbreak"[^>]*>\s*</p>', '<div id="softbreak" style="margin-left: 45%; margin-right: 45%; margin-top:1.5em; margin-bottom:1.5em"><hr style="height: 3px; background:#505050" /></div>', html)
                html = self.multi_blank.sub('\n<p class="softbreak" style="margin-top:1.5em; margin-bottom:1.5em"> </p>', html)
            html = re.sub('<p\s+class="softbreak"[^>]*>\s*</p>', '<div id="softbreak" style="margin-left: 45%; margin-right: 45%; margin-top:1.5em; margin-bottom:1.5em"><hr style="height: 3px; background:#505050" /></div>', html)
        if self.deleted_nbsps:
            # put back non-breaking spaces in empty paragraphs to preserve original formatting
-            html = self.blankreg.sub('\n'+r'\g<openline>'+u'\u00a0'+r'\g<closeline>', html)
+            html = self.anyblank.sub('\n'+r'\g<openline>'+u'\u00a0'+r'\g<closeline>', html)
            html = self.softbreak.sub('\n'+r'\g<openline>'+u'\u00a0'+r'\g<closeline>', html)
        return html
--- a/src/calibre/ebooks/epub/input.py
+++ b/src/calibre/ebooks/epub/input.py
@ -175,6 +175,19 @@ class EPUBInput(InputFormatPlugin):
                raise ValueError(
                    'EPUB files with DTBook markup are not supported')
        for x in list(opf.iterspine()):
            ref = x.get('idref', None)
            if ref is None:
                x.getparent().remove(x)
                continue
            for y in opf.itermanifest():
                if y.get('id', None) == ref and y.get('media-type', None) in \
                    ('application/vnd.adobe-page-template+xml',):
                        p = x.getparent()
                        if p is not None:
                            p.remove(x)
                        break
        with open('content.opf', 'wb') as nopf:
            nopf.write(opf.render())
--- a/src/calibre/ebooks/rtf/input.py
+++ b/src/calibre/ebooks/rtf/input.py
@ -83,6 +83,7 @@ class RTFInput(InputFormatPlugin):
                os.mkdir(debug_dir)
                debug_dir = 'rtfdebug'
                run_lev = 4
                self.log('Running RTFParser in debug mode')
            except:
                pass
        parser = ParseRtf(
@ -230,22 +231,6 @@ class RTFInput(InputFormatPlugin):
        with open('styles.css', 'ab') as f:
            f.write(css)
    # def preprocess(self, fname):
        # self.log('\tPreprocessing to convert unicode characters')
        # try:
            # data = open(fname, 'rb').read()
            # from calibre.ebooks.rtf.preprocess import RtfTokenizer, RtfTokenParser
            # tokenizer = RtfTokenizer(data)
            # tokens = RtfTokenParser(tokenizer.tokens)
            # data = tokens.toRTF()
            # fname = 'preprocessed.rtf'
            # with open(fname, 'wb') as f:
                # f.write(data)
        # except:
            # self.log.exception(
            # 'Failed to preprocess RTF to convert unicode sequences, ignoring...')
        # return fname
    def convert_borders(self, doc):
        border_styles = []
        style_map = {}
@ -280,8 +265,6 @@ class RTFInput(InputFormatPlugin):
        self.opts = options
        self.log = log
        self.log('Converting RTF to XML...')
        #Name of the preprocesssed RTF file
        # fname = self.preprocess(stream.name)
        try:
            xml = self.generate_xml(stream.name)
        except RtfInvalidCodeException, e:
@ -335,3 +318,4 @@ class RTFInput(InputFormatPlugin):
        opf.render(open('metadata.opf', 'wb'))
        return os.path.abspath('metadata.opf')
--- a/src/calibre/ebooks/rtf2xml/ParseRtf.py
+++ b/src/calibre/ebooks/rtf2xml/ParseRtf.py
@ -238,6 +238,8 @@ class ParseRtf:
                    bug_handler = RtfInvalidCodeException,
                        )
            enc = 'cp' + encode_obj.get_codepage()
            if enc == 'cp10000':
                enc = 'mac_roman'
            msg = 'Exception in token processing'
            if check_encoding_obj.check_encoding(self.__file, enc):
                file_name = self.__file if isinstance(self.__file, str) \
--- a/src/calibre/ebooks/rtf2xml/colors.py
+++ b/src/calibre/ebooks/rtf2xml/colors.py
@ -15,8 +15,10 @@
 #                                                                       #
 #                                                                       #
 #########################################################################
-import sys, os, tempfile,  re
+import sys, os, tempfile, re
 from calibre.ebooks.rtf2xml import copy
 class Colors:
    """
    Change lines with color info from color numbers to the actual color names.
@ -40,8 +42,10 @@ class Colors:
        self.__file = in_file
        self.__copy = copy
        self.__bug_handler = bug_handler
        self.__line = 0
        self.__write_to = tempfile.mktemp()
        self.__run_level = run_level
    def __initiate_values(self):
        """
        Initiate all values.
@ -61,6 +65,7 @@ class Colors:
        self.__color_num = 1
        self.__line_color_exp = re.compile(r'bdr-color_:(\d+)')
        # cw<bd<bor-par-to<nu<bdr-hair__|bdr-li-wid:0.50|bdr-sp-wid:1.00|bdr-color_:2
    def __before_color_func(self, line):
        """
        Requires:
@ -76,6 +81,7 @@ class Colors:
        if self.__token_info == 'mi<mk<clrtbl-beg':
            self.__state = 'in_color_table'
        self.__write_obj.write(line)
    def __default_color_func(self, line):
        """
        Requires:
@ -87,6 +93,7 @@ class Colors:
            """
        hex_num = line[-3:-1]
        self.__color_string += hex_num
    def __blue_func(self, line):
        """
        Requires:
@ -109,6 +116,7 @@ class Colors:
        )
        self.__color_num += 1
        self.__color_string = '#'
    def __in_color_func(self, line):
        """
        Requires:
@ -127,12 +135,13 @@ class Colors:
            self.__state = 'after_color_table'
        else:
            action = self.__state_dict.get(self.__token_info)
-            if action == None:
+            if action is None:
                sys.stderr.write('in module colors.py\n'
                'function is self.__in_color_func\n'
                'no action for %s' % self.__token_info
                )
            action(line)
    def __after_color_func(self, line):
        """
        Check the to see if it contains color info. If it does, extract the
@ -180,6 +189,7 @@ class Colors:
        else:
            self.__write_obj.write(line)
        # cw<bd<bor-par-to<nu<bdr-hair__|bdr-li-wid:0.50|bdr-sp-wid:1.00|bdr-color_:2
    def __sub_from_line_color(self, match_obj):
        num = match_obj.group(1)
        try:
@ -191,25 +201,27 @@ class Colors:
            else:
                return 'bdr-color_:no-value'
        hex_num = self.__figure_num(num)
-        return_value = 'bdr-color_:%s' % hex_num
+        return 'bdr-color_:%s' % hex_num
-        return return_value
+
    def __figure_num(self, num):
        if num == 0:
            hex_num = 'false'
        else:
            hex_num = self.__color_dict.get(num)
-        if hex_num == None:
+        if hex_num is None:
            if self.__run_level > 3:
                msg = 'no value in self.__color_dict for key %s\n' % num
                raise self.__bug_hanlder, msg
        if hex_num == None:
            hex_num = '0'
            if self.__run_level > 5:
                msg = 'no value in self.__color_dict' \
                'for key %s at line %d\n' % (num, self.__line)
                raise self.__bug_handler, msg
        return hex_num
    def __do_nothing_func(self, line):
        """
        Bad RTF will have text in the color table
        """
        pass
    def convert_colors(self):
        """
        Requires:
@ -226,20 +238,16 @@ class Colors:
            info, and substitute the number with the hex number.
        """
        self.__initiate_values()
-        read_obj = open(self.__file, 'r')
+        with open(self.__file, 'r') as read_obj:
-        self.__write_obj = open(self.__write_to, 'w')
+            with open(self.__write_to, 'w') as self.__write_obj:
-        line_to_read = 1
+                for line in read_obj:
-        while line_to_read:
+                    self.__line+=1
-            line_to_read = read_obj.readline()
+                    self.__token_info = line[:16]
-            line = line_to_read
+                    action = self.__state_dict.get(self.__state)
-            self.__token_info = line[:16]
+                    if action is None:
-            action = self.__state_dict.get(self.__state)
+                        sys.stderr.write('no matching state in module fonts.py\n')
-            if action == None:
+                        sys.stderr.write(self.__state + '\n')
-                sys.stderr.write('no no matching state in module fonts.py\n')
+                    action(line)
                sys.stderr.write(self.__state + '\n')
            action(line)
        read_obj.close()
        self.__write_obj.close()
        copy_obj = copy.Copy(bug_handler = self.__bug_handler)
        if self.__copy:
            copy_obj.copy_file(self.__write_to, "color.data")
--- a/src/calibre/ebooks/rtf2xml/convert_to_tags.py
+++ b/src/calibre/ebooks/rtf2xml/convert_to_tags.py
@ -33,13 +33,13 @@ class ConvertToTags:
        self.__copy = copy
        self.__dtd_path = dtd_path
        self.__no_dtd = no_dtd
-        if encoding != 'mac_roman':
+        self.__encoding = 'cp' + encoding
-            self.__encoding = 'cp' + encoding
+        if encoding == 'mac_roman':
        else:
            self.__encoding = 'mac_roman'
        self.__indent = indent
        self.__run_level = run_level
        self.__write_to = tempfile.mktemp()
        self.__convert_utf = False
    def __initiate_values(self):
        """
@ -213,7 +213,8 @@ class ConvertToTags:
        if not check_encoding_obj.check_encoding(self.__file, verbose=False):
            self.__write_obj.write('<?xml version="1.0" encoding="US-ASCII" ?>')
        elif not check_encoding_obj.check_encoding(self.__file, self.__encoding):
-            self.__write_obj.write('<?xml version="1.0" encoding="%s" ?>' % self.__encoding)
+            self.__write_obj.write('<?xml version="1.0" encoding="UTF-8" ?>')
            self.__convert_utf = True
        else:
            self.__write_obj.write('<?xml version="1.0" encoding="US-ASCII" ?>')
            sys.stderr.write('Bad RTF encoding, revert to US-ASCII chars and'
@ -253,15 +254,28 @@ class ConvertToTags:
            an empty tag function.
            """
        self.__initiate_values()
-        self.__write_obj = open(self.__write_to, 'w')
+        with open(self.__write_to, 'w') as self.__write_obj:
-        self.__write_dec()
+            self.__write_dec()
-        with open(self.__file, 'r') as read_obj:
+            with open(self.__file, 'r') as read_obj:
-            for line in read_obj:
+                for line in read_obj:
-                self.__token_info = line[:16]
+                    self.__token_info = line[:16]
-                action = self.__state_dict.get(self.__token_info)
+                    action = self.__state_dict.get(self.__token_info)
-                if action is not None:
+                    if action is not None:
-                    action(line)
+                        action(line)
        self.__write_obj.close()
        #convert all encodings to UTF8 to avoid unsupported encodings in lxml
        if self.__convert_utf:
            copy_obj = copy.Copy(bug_handler = self.__bug_handler)
            copy_obj.rename(self.__write_to, self.__file)
            with open(self.__file, 'r') as read_obj:
                with open(self.__write_to, 'w') as write_obj:
                    file = read_obj.read()
                    try:
                        file = file.decode(self.__encoding)
                        write_obj.write(file.encode('utf-8'))
                    except:
                        sys.stderr.write('Conversion to UTF-8 is not possible,'
                        ' encoding should be very carefully checked')
        copy_obj = copy.Copy(bug_handler = self.__bug_handler)
        if self.__copy:
            copy_obj.copy_file(self.__write_to, "convert_to_tags.data")
--- a/src/calibre/ebooks/rtf2xml/default_encoding.py
+++ b/src/calibre/ebooks/rtf2xml/default_encoding.py
@ -75,12 +75,16 @@ class DefaultEncoding:
            self._encoding()
            self.__datafetched = True
            code_page = 'ansicpg' + self.__code_page
            if self.__code_page == '10000':
                self.__code_page = 'mac_roman'
        return self.__platform, code_page, self.__default_num
    def get_codepage(self):
        if not self.__datafetched:
            self._encoding()
            self.__datafetched = True
            if self.__code_page == '10000':
                self.__code_page = 'mac_roman'
        return self.__code_page
    def get_platform(self):
--- a/src/calibre/ebooks/rtf2xml/fonts.py
+++ b/src/calibre/ebooks/rtf2xml/fonts.py
@ -16,7 +16,9 @@
 #                                                                       #
 #########################################################################
 import sys, os, tempfile
 from calibre.ebooks.rtf2xml import copy
 class Fonts:
    """
    Change lines with font info from font numbers to the actual font names.
@ -45,6 +47,7 @@ class Fonts:
        self.__default_font_num = default_font_num
        self.__write_to = tempfile.mktemp()
        self.__run_level = run_level
    def __initiate_values(self):
        """
        Initiate all values.
@ -67,6 +70,7 @@ class Fonts:
        self.__font_table = {}
        # individual font written
        self.__wrote_ind_font = 0
    def __default_func(self, line):
        """
        Requires:
@ -79,6 +83,7 @@ class Fonts:
        if self.__token_info == 'mi<mk<fonttb-beg':
            self.__state = 'font_table'
        self.__write_obj.write(line)
    def __font_table_func(self, line):
        """
        Requires:
@ -101,6 +106,7 @@ class Fonts:
            self.__font_num = self.__default_font_num
            self.__text_line = ''
        ##self.__write_obj.write(line)
    def __font_in_table_func(self, line):
        """
        Requires:
@ -138,6 +144,7 @@ class Fonts:
        elif self.__token_info == 'mi<mk<fonttb-end':
            self.__found_end_font_table_func()
            self.__state = 'after_font_table'
    def __found_end_font_table_func(self):
        """
        Required:
@ -150,7 +157,8 @@ class Fonts:
        if not self.__wrote_ind_font:
            self.__write_obj.write(
            'mi<tg<empty-att_'
-            '<font-in-table<name>Times<num>0\n' )
+            '<font-in-table<name>Times<num>0\n')
    def __after_font_table_func(self, line):
        """
        Required:
@ -169,7 +177,7 @@ class Fonts:
        if self.__token_info == 'cw<ci<font-style':
            font_num = line[20:-1]
            font_name = self.__font_table.get(font_num)
-            if font_name == None:
+            if font_name is None:
                if self.__run_level > 3:
                    msg = 'no value for %s in self.__font_table\n' % font_num
                    raise self.__bug_handler, msg
@ -182,6 +190,7 @@ class Fonts:
                )
        else:
            self.__write_obj.write(line)
    def convert_fonts(self):
        """
        Required:
@ -197,20 +206,15 @@ class Fonts:
            info. Substitute a font name for a font number.
            """
        self.__initiate_values()
-        read_obj = open(self.__file, 'r')
+        with open(self.__file, 'r') as read_obj:
-        self.__write_obj = open(self.__write_to, 'w')
+            with open(self.__write_to, 'w') as self.__write_obj:
-        line_to_read = 1
+                for line in read_obj:
-        while line_to_read:
+                    self.__token_info = line[:16]
-            line_to_read = read_obj.readline()
+                    action = self.__state_dict.get(self.__state)
-            line = line_to_read
+                    if action is None:
-            self.__token_info = line[:16]
+                        sys.stderr.write('no matching state in module fonts.py\n' \
-            action = self.__state_dict.get(self.__state)
+                                            + self.__state + '\n')
-            if action == None:
+                    action(line)
                sys.stderr.write('no no matching state in module fonts.py\n')
                sys.stderr.write(self.__state + '\n')
            action(line)
        read_obj.close()
        self.__write_obj.close()
        default_font_name = self.__font_table.get(self.__default_font_num)
        if not default_font_name:
            default_font_name = 'Not Defined'
--- a/src/calibre/ebooks/rtf2xml/get_char_map.py
+++ b/src/calibre/ebooks/rtf2xml/get_char_map.py
@ -43,7 +43,7 @@ class GetCharMap:
    def get_char_map(self, map):
        if map == 'ansicpg0':
            map = 'ansicpg1250'
-        if map in ('ansicpg10000', '10000'):
+        if map == 'ansicpg10000':
            map = 'mac_roman'
        found_map = False
        map_dict = {}
--- a/src/calibre/ebooks/rtf2xml/tokenize.py
+++ b/src/calibre/ebooks/rtf2xml/tokenize.py
@ -126,12 +126,6 @@ class Tokenize:
        tokens = re.split(self.__splitexp, input_file)
        #remove empty tokens and \n
        return filter(lambda x: len(x) > 0 and x != '\n', tokens)
        #input_file = re.sub(self.__utf_exp, self.__from_ms_to_utf8, input_file)
        # line = re.sub( self.__neg_utf_exp, self.__neg_unicode_func, line)
        # this is for older RTF
        #line = re.sub(self.__par_exp, '\\par ', line)
        #return filter(lambda x: len(x) > 0, \
            #(self.__remove_line.sub('', x) for x in tokens)) 
    def __compile_expressions(self):
        SIMPLE_RPL = {
@ -160,7 +154,7 @@ class Tokenize:
            }
        self.__replace_spchar = MReplace(SIMPLE_RPL)
        #add ;? in case of char following \u
-        self.__ms_hex_exp = re.compile(r"\\\'([0-9a-fA-F]{2})") #r"\\\'(..)"
+        self.__ms_hex_exp = re.compile(r"\\\'([0-9a-fA-F]{2})")
        self.__utf_exp = re.compile(r"\\u(-?\d{3,6}) ?")
        self.__bin_exp = re.compile(r"(?:\\bin(-?\d{0,10})[\n ]+)[01\n]+")
        #manage upr/ud situations
@ -172,14 +166,21 @@ class Tokenize:
        self.__splitexp = re.compile(r"(\\[{}]|\n|\\[^\s\\{}&]+(?:[ \t\r\f\v])?)")
        #this is for old RTF
        self.__par_exp = re.compile(r'\\\n+')
-        # self.__par_exp = re.compile(r'\\$')
+        #handle cw using a digit as argument and without space as delimiter
        self.__cwdigit_exp = re.compile(r"(\\[a-zA-Z]+[\-0-9]+)([^0-9 \\]+)")
        #self.__bin_exp = re.compile(r"\\bin(-?\d{1,8}) {0,1}")
        #self.__utf_exp = re.compile(r"^\\u(-?\d{3,6})")
        #self.__splitexp = re.compile(r"(\\[\\{}]|{|}|\n|\\[^\s\\{}&]+(?:\s)?)")
        #self.__remove_line = re.compile(r'\n+')
        #self.__mixed_exp = re.compile(r"(\\[a-zA-Z]+\d+)(\D+)")
        ##self.num_exp = re.compile(r"(\*|:|[a-zA-Z]+)(.*)")
    def __correct_spliting(self, token):
        match_obj = re.search(self.__cwdigit_exp, token)
        if match_obj is None:
            return token
        else:
            return '%s\n%s' % (match_obj.group(1), match_obj.group(2))
    def tokenize(self):
        """Main class for handling other methods. Reads the file \
        , uses method self.sub_reg to make basic substitutions,\
@ -187,7 +188,7 @@ class Tokenize:
        #read
        with open(self.__file, 'r') as read_obj:
            input_file = read_obj.read()
-        
+
        #process simple replacements and split giving us a correct list
        #remove '' and \n in the process
        tokens = self.__sub_reg_split(input_file)
@ -195,7 +196,9 @@ class Tokenize:
        tokens = map(self.__unicode_process, tokens)
        #remove empty items created by removing \uc
        tokens = filter(lambda x: len(x) > 0, tokens)
-        
+        #handles bothersome cases
        tokens = map(self.__correct_spliting, tokens)
        #write
        with open(self.__write_to, 'wb') as write_obj:
            write_obj.write('\n'.join(tokens))
@ -203,11 +206,9 @@ class Tokenize:
        copy_obj = copy.Copy(bug_handler = self.__bug_handler)
        if self.__copy:
            copy_obj.copy_file(self.__write_to, "tokenize.data")
        # if self.__out_file:
            # self.__file = self.__out_file
        copy_obj.rename(self.__write_to, self.__file)
        os.remove(self.__write_to)
-        
+
        #self.__special_tokens = [ '_', '~', "'", '{', '}' ]
 # import sys
@ -223,4 +224,4 @@ class Tokenize:
 # if __name__ == '__main__':
-    # sys.exit(main())
+    # sys.exit(main())
--- a/src/calibre/gui2/convert/comic_input.py
+++ b/src/calibre/gui2/convert/comic_input.py
@ -22,7 +22,7 @@ class PluginWidget(Widget, Ui_Form):
                ['colors', 'dont_normalize', 'keep_aspect_ratio', 'right2left',
                    'despeckle', 'no_sort', 'no_process', 'landscape',
                    'dont_sharpen', 'disable_trim', 'wide', 'output_format',
-                    'dont_grayscale']
+                    'dont_grayscale', 'comic_image_size']
                )
        self.db, self.book_id = db, book_id
        for x in get_option('output_format').option.choices:
--- a/src/calibre/gui2/convert/comic_input.ui
+++ b/src/calibre/gui2/convert/comic_input.ui
@ -7,7 +7,7 @@
    <x>0</x>
    <y>0</y>
    <width>599</width>
-    <height>345</height>
+    <height>398</height>
   </rect>
  </property>
  <property name="windowTitle">
@ -37,70 +37,70 @@
     </property>
    </widget>
   </item>
-   <item row="3" column="0">
+   <item row="4" column="0">
    <widget class="QCheckBox" name="opt_dont_normalize">
     <property name="text">
      <string>Disable &amp;normalize</string>
     </property>
    </widget>
   </item>
-   <item row="4" column="0">
+   <item row="5" column="0">
    <widget class="QCheckBox" name="opt_keep_aspect_ratio">
     <property name="text">
      <string>Keep &amp;aspect ratio</string>
     </property>
    </widget>
   </item>
-   <item row="5" column="0">
+   <item row="6" column="0">
    <widget class="QCheckBox" name="opt_dont_sharpen">
     <property name="text">
      <string>Disable &amp;Sharpening</string>
     </property>
    </widget>
   </item>
-   <item row="6" column="0">
+   <item row="7" column="0">
    <widget class="QCheckBox" name="opt_disable_trim">
     <property name="text">
      <string>Disable &amp;Trimming</string>
     </property>
    </widget>
   </item>
-   <item row="7" column="0">
+   <item row="8" column="0">
    <widget class="QCheckBox" name="opt_wide">
     <property name="text">
      <string>&amp;Wide</string>
     </property>
    </widget>
   </item>
-   <item row="8" column="0">
+   <item row="9" column="0">
    <widget class="QCheckBox" name="opt_landscape">
     <property name="text">
      <string>&amp;Landscape</string>
     </property>
    </widget>
   </item>
-   <item row="9" column="0">
+   <item row="10" column="0">
    <widget class="QCheckBox" name="opt_right2left">
     <property name="text">
      <string>&amp;Right to left</string>
     </property>
    </widget>
   </item>
-   <item row="10" column="0">
+   <item row="11" column="0">
    <widget class="QCheckBox" name="opt_no_sort">
     <property name="text">
      <string>Don't so&amp;rt</string>
     </property>
    </widget>
   </item>
-   <item row="11" column="0">
+   <item row="12" column="0">
    <widget class="QCheckBox" name="opt_despeckle">
     <property name="text">
      <string>De&amp;speckle</string>
     </property>
    </widget>
   </item>
-   <item row="13" column="0">
+   <item row="14" column="0">
    <spacer name="verticalSpacer">
     <property name="orientation">
      <enum>Qt::Vertical</enum>
@ -120,7 +120,7 @@
     </property>
    </widget>
   </item>
-   <item row="12" column="0">
+   <item row="13" column="0">
    <widget class="QLabel" name="label">
     <property name="text">
      <string>&amp;Output format:</string>
@ -130,7 +130,7 @@
     </property>
    </widget>
   </item>
-   <item row="12" column="1">
+   <item row="13" column="1">
    <widget class="QComboBox" name="opt_output_format"/>
   </item>
   <item row="1" column="0">
@ -140,6 +140,19 @@
     </property>
    </widget>
   </item>
   <item row="3" column="0">
    <widget class="QLabel" name="label_2">
     <property name="text">
      <string>Override image  &amp;size:</string>
     </property>
     <property name="buddy">
      <cstring>opt_comic_image_size</cstring>
     </property>
    </widget>
   </item>
   <item row="3" column="1">
    <widget class="QLineEdit" name="opt_comic_image_size"/>
   </item>
  </layout>
 </widget>
 <resources/>
--- a/src/calibre/gui2/device.py
+++ b/src/calibre/gui2/device.py
@ -838,9 +838,9 @@ class DeviceMixin(object): # {{{
                            format_count[f] = 1
            for f in self.device_manager.device.settings().format_map:
                if f in format_count.keys():
-                    formats.append((f, _('%i of %i Books' % (format_count[f], len(rows))), True if f in aval_out_formats else False))
+                    formats.append((f, _('%i of %i Books') % (format_count[f], len(rows))), True if f in aval_out_formats else False)
                elif f in aval_out_formats:
-                    formats.append((f, _('0 of %i Books' % len(rows)), True))
+                    formats.append((f, _('0 of %i Books') % len(rows)), True)
            d = ChooseFormatDeviceDialog(self, _('Choose format to send to device'), formats)
            if d.exec_() != QDialog.Accepted:
                return
--- a/src/calibre/gui2/dialogs/check_library.py
+++ b/src/calibre/gui2/dialogs/check_library.py
@ -7,7 +7,7 @@ import os, shutil
 from PyQt4.Qt import QDialog, QVBoxLayout, QHBoxLayout, QTreeWidget, QLabel, \
            QPushButton, QDialogButtonBox, QApplication, QTreeWidgetItem, \
-            QLineEdit, Qt, QProgressBar, QSize, QTimer
+            QLineEdit, Qt, QProgressBar, QSize, QTimer, QIcon, QTextEdit
 from calibre.gui2.dialogs.confirm_delete import confirm
 from calibre.library.check_library import CheckLibrary, CHECKS
@ -16,7 +16,7 @@ from calibre import prints, as_unicode
 from calibre.ptempfile import PersistentTemporaryFile
 from calibre.library.sqlite import DBThread, OperationalError
-class DBCheck(QDialog):
+class DBCheck(QDialog): # {{{
    def __init__(self, parent, db):
        QDialog.__init__(self, parent)
@ -134,7 +134,7 @@ class DBCheck(QDialog):
    def reject(self):
        self.rejected = True
        QDialog.reject(self)
-
+# }}}
 class Item(QTreeWidgetItem):
    pass
@ -146,9 +146,70 @@ class CheckLibraryDialog(QDialog):
        self.db = db
        self.setWindowTitle(_('Check Library -- Problems Found'))
        self.setWindowIcon(QIcon(I('debug.png')))
-        self._layout = QVBoxLayout(self)
+        self._tl = QHBoxLayout()
-        self.setLayout(self._layout)
+        self._layout = QVBoxLayout()
        self.setLayout(self._tl)
        self._tl.addLayout(self._layout)
        self.helpw = QTextEdit(self)
        self._tl.addWidget(self.helpw)
        self.helpw.setReadOnly(True)
        self.helpw.setText(_('''\
        <h1>Help</h1>
        <p>calibre stores the list of your books and their metadata in a
        database. The actual book files and covers are stored as normal
        files in the calibre library folder. The database contains a list of the files
        and covers belonging to each book entry. This tool checks that the
        actual files in the library folder on your computer match the
        information in the database.</p>
        <p>The result of each type of check is shown to the left. The various
        checks are:
        </p>
        <ul>
        <li><b>Invalid titles</b>: These are files and folders appearing
        in the library where books titles should, but that do not have the
        correct form to be a book title.</li>
        <li><b>Extra titles</b>: These are extra files in your calibre
        library that appear to be correctly-formed titles, but have no corresponding
        entries in the database</li>
        <li><b>Invalid authors</b>: These are files appearing
        in the library where only author folders should be.</li>
        <li><b>Extra authors</b>: These are folders in the
        calibre library that appear to be authors but that do not have entries
        in the database</li>
        <li><b>Missing book formats</b>: These are book formats that are in
        the database but have no corresponding format file in the book's folder.
        <li><b>Extra book formats</b>: These are book format files found in
        the book's folder but not in the database.
        <li><b>Unknown files in books</b>: These are extra files in the
        folder of each book that do not correspond to a known format or cover
        file.</li>
        <li><b>Missing cover files</b>: These represent books that are marked
        in the database as having covers but the actual cover files are
        missing.</li>
        <li><b>Cover files not in database</b>: These are books that have
        cover files but are marked as not having covers in the database.</li>
        <li><b>Folder raising exception</b>: These represent folders in the
        calibre library that could not be processed/understood by this
        tool.</li>
        </ul>
        <p>There are two kinds of automatic fixes possible: <i>Delete
        marked</i> and <i>Fix marked</i>.</p>
        <p><i>Delete marked</i> is used to remove extra files/folders/covers that
        have no entries in the database. Check the box next to the item you want
        to delete. Use with caution.</p>
        <p><i>Fix marked</i> is applicable only to covers (the two lines marked
        'fixable'). In the case of missing cover files, checking the fixable
        box and pushing this button will remove the cover mark from the
        database for all the files in that category. In the case of extra
        cover files, checking the fixable box and pushing this button will
        add the cover mark to the database for all the files in that
        category.</p>
        '''))
        self.log = QTreeWidget(self)
        self.log.itemChanged.connect(self.item_changed)
@ -199,7 +260,7 @@ class CheckLibraryDialog(QDialog):
        self._layout.addLayout(h)
        self._layout.addWidget(self.bbox)
-        self.resize(750, 500)
+        self.resize(950, 500)
        self.bbox.setEnabled(True)
    def do_exec(self):
@ -347,5 +408,6 @@ class CheckLibraryDialog(QDialog):
 if __name__ == '__main__':
    app = QApplication([])
-    d = CheckLibraryDialog()
+    from calibre.library import db
    d = CheckLibraryDialog(None, db())
    d.exec_()
--- a/src/calibre/gui2/preferences/plugins.py
+++ b/src/calibre/gui2/preferences/plugins.py
@ -266,7 +266,7 @@ class ConfigWidget(ConfigWidgetBase, Ui_Form):
    def add_plugin(self):
        path = choose_files(self, 'add a plugin dialog', _('Add plugin'),
-                filters=[(_('Plugins'), ['zip'])], all_files=False,
+                filters=[(_('Plugins') + ' (*.zip)', ['zip'])], all_files=False,
                    select_only_single_file=True)
        if not path:
            return
--- a/src/calibre/library/catalog.py
+++ b/src/calibre/library/catalog.py
@ -232,6 +232,7 @@ class BIBTEX(CatalogPlugin): # {{{
                help = _('The fields to output when cataloging books in the '
                    'database.  Should be a comma-separated list of fields.\n'
                    'Available fields: %s.\n'
                    'plus user-created custom fields.\n'
                    'Example: %s=title,authors,tags\n'
                    "Default: '%%default'\n"
                    "Applies to: BIBTEX output format")%(', '.join(FIELDS),
@ -269,7 +270,7 @@ class BIBTEX(CatalogPlugin): # {{{
                dest = 'bib_cit',
                action = None,
                help = _('The template for citation creation from database fields.\n'
-                    ' Should be a template with {} enclosed fields.\n'
+                    'Should be a template with {} enclosed fields.\n'
                    'Available fields: %s.\n'
                    "Default: '%%default'\n"
                    "Applies to: BIBTEX output format")%', '.join(TEMPLATE_ALLOWED_FIELDS)),
@ -344,7 +345,7 @@ class BIBTEX(CatalogPlugin): # {{{
                if field == 'authors' :
                    bibtex_entry.append(u'author = "%s"' % bibtexdict.bibtex_author_format(item))
-                elif field in ['title', 'publisher', 'cover', 'uuid',
+                elif field in ['title', 'publisher', 'cover', 'uuid', 'ondevice',
                        'author_sort', 'series'] :
                    bibtex_entry.append(u'%s = "%s"' % (field, bibtexdict.utf8ToBibtex(item)))
@ -378,7 +379,7 @@ class BIBTEX(CatalogPlugin): # {{{
                    if calibre_files:
                        files = [u':%s:%s' % (format, format.rpartition('.')[2].upper())\
                            for format in item]
-                        bibtex_entry.append(u'files = "%s"' % u', '.join(files))
+                        bibtex_entry.append(u'file = "%s"' % u', '.join(files))
                elif field == 'series_index' :
                    bibtex_entry.append(u'volume = "%s"' % int(item))
@ -474,6 +475,8 @@ class BIBTEX(CatalogPlugin): # {{{
        if opts.verbose:
            opts_dict = vars(opts)
            log("%s(): Generating %s" % (self.name,self.fmt))
            if opts.connected_device['is_device_connected']:
                log(" connected_device: %s" % opts.connected_device['name'])
            if opts_dict['search_text']:
                log(" --search='%s'" % opts_dict['search_text'])
@ -548,6 +551,7 @@ class BIBTEX(CatalogPlugin): # {{{
            as outfile:
            #File header
            nb_entries = len(data)
            #check in book strict if all is ok else throw a warning into log
            if bib_entry == 'book' :
                nb_books = len(filter(check_entry_book_valid, data))
@ -555,6 +559,11 @@ class BIBTEX(CatalogPlugin): # {{{
                    log(" WARNING: only %d entries in %d are book compatible" % (nb_books, nb_entries))
                    nb_entries = nb_books
            # If connected device, add 'On Device' values to data
            if opts.connected_device['is_device_connected'] and 'ondevice' in fields:
                for entry in data:
                    entry['ondevice'] = db.catalog_plugin_on_device_temp_mapping[entry['id']]['ondevice']
            outfile.write(u'%%%Calibre catalog\n%%%{0} entries in catalog\n\n'.format(nb_entries))
            outfile.write(u'@preamble{"This catalog of %d entries was generated by calibre on %s"}\n\n'
                % (nb_entries, nowf().strftime("%A, %d. %B %Y %H:%M").decode(preferred_encoding)))
--- a/src/calibre/translations/calibre.pot
+++ b/src/calibre/translations/calibre.pot
--- a/src/calibre/utils/localization.py
+++ b/src/calibre/utils/localization.py
@ -112,6 +112,16 @@ _extra_lang_codes = {
        'en_IE' : _('English (Ireland)'),
        'en_CN' : _('English (China)'),
        'es_PY' : _('Spanish (Paraguay)'),
        'es_UY' : _('Spanish (Uruguay)'),
        'es_AR' : _('Spanish (Argentina)'),
        'es_MX' : _('Spanish (Mexico)'),
        'es_CU' : _('Spanish (Cuba)'),
        'es_CL' : _('Spanish (Chile)'),
        'es_EC' : _('Spanish (Ecuador)'),
        'es_HN' : _('Spanish (Honduras)'),
        'es_VE' : _('Spanish (Venezuela)'),
        'es_BO' : _('Spanish (Bolivia)'),
        'es_NI' : _('Spanish (Nicaragua)'),
        'de_AT' : _('German (AT)'),
        'fr_BE' : _('French (BE)'),
        'nl'    : _('Dutch (NL)'),
`@ -50,4 +50,4 @@ class LaCuarta(BasicNewsRecipe):`
	`feeds = [(u'Noticias', u'http://lacuarta.cl/app/rss?sc=TEFDVUFSVEE=')]`	`feeds = [(u'Noticias', u'http://lacuarta.cl/app/rss?sc=TEFDVUFSVEE=')]`


	`language = 'es'`	`language = 'es_CL'`