Sync to trunk.

2025-08-30 23:00:21 -04:00 · 2011-01-31 18:56:29 -05:00 · 2011-01-31 18:56:29 -05:00 · 54e7ba109d
commit 54e7ba109d
parent 76837bbd7e 5849b45d11
75 changed files with 973 additions and 556 deletions
--- a/resources/recipes/180.recipe
+++ b/resources/recipes/180.recipe
@ -12,7 +12,7 @@ class Noticias(BasicNewsRecipe):
    title                 = '180.com.uy'
    __author__            = 'Gustavo Azambuja'
    description           = 'Noticias de Uruguay'
-    language       = 'es'
+    language       = 'es_UY'
    timefmt        = '[%a, %d %b, %Y]'
    use_embedded_content  = False
    recursion             = 5
--- a/resources/recipes/7dias.recipe
+++ b/resources/recipes/7dias.recipe
@ -20,7 +20,7 @@ class SieteDias(BasicNewsRecipe):
    no_stylesheets        = True
    use_embedded_content  = False
    encoding              = 'utf-8'
-    language = 'es'
+    language = 'es_AR'

    lang                  = 'es-AR'
    direction             = 'ltr'
--- a/resources/recipes/ambito.recipe
+++ b/resources/recipes/ambito.recipe
@ -58,4 +58,4 @@ class Ambito(BasicNewsRecipe):
            del item['style']
        return soup

-    language = 'es'
+    language = 'es_AR'
--- a/resources/recipes/animal_politico.recipe
+++ b/resources/recipes/animal_politico.recipe
@ -12,7 +12,7 @@ class AdvancedUserRecipe1290663986(BasicNewsRecipe):
    masthead_url   = 'http://www.animalpolitico.com/wp-content/themes/animal_mu/images/logo.png'
    oldest_article = 1
    max_articles_per_feed = 100
-    language       = 'es'
+    language       = 'es_MX'

    #feeds          = [(u'Animal Politico', u'http://www.animalpolitico.com/feed/')]

--- a/resources/recipes/axxon_magazine.recipe
+++ b/resources/recipes/axxon_magazine.recipe
@ -17,7 +17,7 @@ class Axxon_news(BasicNewsRecipe):
    max_articles_per_feed = 100
    no_stylesheets        = False
    use_embedded_content  = False
-    language              = 'es'
+    language              = 'es_AR'
    encoding              = 'utf-8'
    publication_type      = 'magazine'
    INDEX                 = 'http://axxon.com.ar/rev/'
--- a/resources/recipes/axxon_news.recipe
+++ b/resources/recipes/axxon_news.recipe
@ -18,7 +18,7 @@ class Axxon_news(BasicNewsRecipe):
    max_articles_per_feed = 100
    no_stylesheets        = False
    use_embedded_content  = False
-    language = 'es'
+    language = 'es_AR'

    lang                  = 'es-AR'

--- a/resources/recipes/bitacora.recipe
+++ b/resources/recipes/bitacora.recipe
@ -12,7 +12,7 @@ class General(BasicNewsRecipe):
    title                 = 'bitacora.com.uy'
    __author__            = 'Gustavo Azambuja'
    description           = 'Noticias de Uruguay'
-    language       = 'es'
+    language       = 'es_UY'
    timefmt        = '[%a, %d %b, %Y]'
    use_embedded_content  = False
    recursion             = 5
--- a/resources/recipes/buenosaireseconomico.recipe
+++ b/resources/recipes/buenosaireseconomico.recipe
@ -20,7 +20,7 @@ class BsAsEconomico(BasicNewsRecipe):
    no_stylesheets        = True
    use_embedded_content  = False
    encoding              = 'utf-8'
-    language = 'es'
+    language = 'es_AR'

    lang                  = 'es-AR'
    direction             = 'ltr'
--- a/resources/recipes/clarin.recipe
+++ b/resources/recipes/clarin.recipe
@ -18,7 +18,7 @@ class Clarin(BasicNewsRecipe):
    use_embedded_content  = False
    no_stylesheets        = True
    encoding              = 'utf8'
-    language              = 'es'
+    language              = 'es_AR'
    publication_type      = 'newspaper'
    INDEX                 = 'http://www.clarin.com'
    masthead_url          = 'http://www.clarin.com/static/CLAClarin/images/logo-clarin-print.jpg'
--- a/resources/recipes/criticadigital.recipe
+++ b/resources/recipes/criticadigital.recipe
@ -14,7 +14,7 @@ class CriticaDigital(BasicNewsRecipe):
    description           = 'Noticias de Argentina'
    oldest_article        = 2
    max_articles_per_feed = 100
-    language = 'es'
+    language = 'es_AR'

    no_stylesheets        = True
    use_embedded_content  = False
--- a/resources/recipes/cubadebate.recipe
+++ b/resources/recipes/cubadebate.recipe
@ -11,7 +11,7 @@ class CubaDebate(BasicNewsRecipe):
    __author__            = 'Darko Miletic'
    description           = 'Contra el Terorismo Mediatico'
    oldest_article        = 15
-    language              = 'es'
+    language              = 'es_CU'
    max_articles_per_feed = 100
    no_stylesheets        = True
    use_embedded_content  = False
@ -20,8 +20,8 @@ class CubaDebate(BasicNewsRecipe):
    encoding              = 'utf-8'
    masthead_url          = 'http://www.cubadebate.cu/wp-content/themes/cubadebate/images/logo.gif'
    publication_type      = 'newsportal'
-    extra_css             = """ 
-                               #BlogTitle{font-size: xx-large; font-weight: bold} 
+    extra_css             = """
+                               #BlogTitle{font-size: xx-large; font-weight: bold}
                               body{font-family: Verdana, Arial, Tahoma, sans-serif}
                            """

@ -41,7 +41,7 @@ class CubaDebate(BasicNewsRecipe):

    feeds          = [(u'Articulos', u'http://www.cubadebate.cu/feed/')]
    remove_attributes=['width','height','lang']
-    
+
    def print_version(self, url):
        return url + 'print/'

@ -50,5 +50,5 @@ class CubaDebate(BasicNewsRecipe):
            del item['style']
        for item in soup.findAll('img'):
            if not item.has_key('alt'):
-               item['alt'] = 'image'                
+               item['alt'] = 'image'
        return soup
--- a/resources/recipes/deutsche_welle_es.recipe
+++ b/resources/recipes/deutsche_welle_es.recipe
@ -16,7 +16,7 @@ class DeutscheWelle_es(BasicNewsRecipe):
    max_articles_per_feed = 100
    use_embedded_content  = False
    no_stylesheets        = True
-    language              = 'es'
+    language              = 'de_ES'
    publication_type      = 'newsportal'
    remove_empty_feeds    = True
    masthead_url          = 'http://www.dw-world.de/skins/std/channel1/pics/dw_logo1024.gif'
--- a/resources/recipes/diagonales.recipe
+++ b/resources/recipes/diagonales.recipe
@ -20,7 +20,7 @@ class Diagonales(BasicNewsRecipe):
    no_stylesheets        = True
    use_embedded_content  = False
    encoding              = 'utf-8'
-    language = 'es'
+    language = 'es_AR'

    lang                  = 'es-AR'
    direction             = 'ltr'
--- a/resources/recipes/el_mercurio_chile.recipe
+++ b/resources/recipes/el_mercurio_chile.recipe
@ -20,8 +20,8 @@ class ElMercurio(BasicNewsRecipe):
    masthead_url          = 'http://www.emol.com/especiales/logo_emol/logo_emol.gif'
    remove_javascript     = True
    use_embedded_content  = False
-    language              = 'es'
-    
+    language              = 'es_CL'
+

    conversion_options = {
                          'comment'   : description
@ -33,7 +33,7 @@ class ElMercurio(BasicNewsRecipe):
    keep_only_tags = [dict(name='div', attrs={'id':['cont_iz_titulobajada','cont_iz_creditos_1_a','cont_iz_cuerpo']})]
    remove_tags = [dict(name='div', attrs={'id':'cont_iz_cuerpo_relacionados'})]
    remove_attributes = ['height','width']
-    
+
    feeds = [
               (u'Noticias de ultima hora', u'http://rss.emol.com/rss.asp?canal=0')
              ,(u'Nacional', u'http://rss.emol.com/rss.asp?canal=1')
--- a/resources/recipes/el_observador.recipe
+++ b/resources/recipes/el_observador.recipe
@ -13,7 +13,7 @@ class ObservaDigital(BasicNewsRecipe):
    title                 = 'Observa Digital'
    __author__            = 'yrvn'
    description           = 'Noticias de Uruguay'
-    language       = 'es'
+    language       = 'es_UY'
    timefmt        = '[%a, %d %b, %Y]'
    use_embedded_content  = False
    recursion             = 5
--- a/resources/recipes/el_pais_uy.recipe
+++ b/resources/recipes/el_pais_uy.recipe
@ -14,7 +14,7 @@ class General(BasicNewsRecipe):
    description           = 'Noticias de Uruguay y el resto del mundo'
    publisher             = 'EL PAIS S.A.'
    category              = 'news, politics, Uruguay'
-    language       = 'es'
+    language       = 'es_UY'
    timefmt        = '[%a, %d %b, %Y]'
    use_embedded_content  = False
    recursion             = 2
--- a/resources/recipes/el_universal.recipe
+++ b/resources/recipes/el_universal.recipe
@ -20,7 +20,7 @@ class ElUniversal(BasicNewsRecipe):
    remove_javascript     = True
    remove_empty_feeds    = True
    publication_type      = 'newspaper'
-    language              = 'es'
+    language              = 'es_MX'

    extra_css = '''
                    body{font-family:Arial,Helvetica,sans-serif}
--- a/resources/recipes/elargentino.recipe
+++ b/resources/recipes/elargentino.recipe
@ -12,7 +12,7 @@ class ElArgentino(BasicNewsRecipe):
    __author__            = 'Darko Miletic'
    description           = 'Informacion Libre las 24 horas'
    publisher             = 'ElArgentino.com'
-    category              = 'news, politics, Argentina'    
+    category              = 'news, politics, Argentina'
    oldest_article        = 2
    max_articles_per_feed = 100
    remove_javascript     = True
@ -20,7 +20,7 @@ class ElArgentino(BasicNewsRecipe):
    use_embedded_content  = False
    encoding              = 'utf8'
    cover_url             = 'http://www.elargentino.com/TemplateWeb/MediosFooter/tapa_elargentino.png'
-    language = 'es'
+    language = 'es_AR'


    html2lrf_options = [
@ -28,16 +28,16 @@ class ElArgentino(BasicNewsRecipe):
                        , '--category', category
                        , '--publisher', publisher
                        ]
-    
-    html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"' 
+
+    html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'

    remove_tags = [
                     dict(name='div', attrs={'id':'noprint'              })
                    ,dict(name='div', attrs={'class':'encabezadoImprimir'})
                    ,dict(name='a'  , attrs={'target':'_blank'           })
                  ]
-    
-    feeds = [ 
+
+    feeds = [
              (u'Portada'     , u'http://www.elargentino.com/Highlights.aspx?Content-Type=text/xml&ChannelDesc=Home'                                             )
             ,(u'Pais'        , u'http://www.elargentino.com/Highlights.aspx?ParentType=Section&ParentId=112&Content-Type=text/xml&ChannelDesc=Pa%C3%ADs'        )
             ,(u'Economia'    , u'http://www.elargentino.com/Highlights.aspx?ParentType=Section&ParentId=107&Content-Type=text/xml&ChannelDesc=Econom%C3%ADa'    )
@ -51,12 +51,12 @@ class ElArgentino(BasicNewsRecipe):

    def print_version(self, url):
        main, sep, article_part = url.partition('/nota-')
-        article_id, rsep, rrest = article_part.partition('-')    
+        article_id, rsep, rrest = article_part.partition('-')
        return u'http://www.elargentino.com/Impresion.aspx?Id=' + article_id

    def preprocess_html(self, soup):
        mtag = '<meta http-equiv="Content-Type" content="text/html; charset=utf-8">\n<meta http-equiv="Content-Language" content="es-AR"/>\n'
        soup.head.insert(0,mtag)
        for item in soup.findAll(style=True):
-            del item['style']        
+            del item['style']
        return soup
--- a/resources/recipes/elcomercio.recipe
+++ b/resources/recipes/elcomercio.recipe
@ -18,7 +18,7 @@ class ElComercio(BasicNewsRecipe):
    no_stylesheets        = True
    encoding              = 'utf-8'
    use_embedded_content  = True
-    language              = 'es'
+    language              = 'es_EC'
    masthead_url          = 'http://ww1.elcomercio.com/nv_images/headers/EC/logo_new_08.gif'
    extra_css             = ' body{font-family: Arial,Verdana,sans-serif} img{margin-bottom: 1em} '

--- a/resources/recipes/elcronista.recipe
+++ b/resources/recipes/elcronista.recipe
@ -13,7 +13,7 @@ class ElCronista(BasicNewsRecipe):
    __author__            = 'Darko Miletic'
    description           = 'Noticias de Argentina'
    oldest_article        = 2
-    language = 'es'
+    language = 'es_AR'

    max_articles_per_feed = 100
    no_stylesheets        = True
@ -25,14 +25,14 @@ class ElCronista(BasicNewsRecipe):
                        , '--category'      , 'news, Argentina'
                        , '--publisher'     , title
                        ]
-    
+
    keep_only_tags = [
                        dict(name='table', attrs={'width':'100%'             })
                       ,dict(name='h1'   , attrs={'class':'Arialgris16normal'})
                     ]

    remove_tags = [dict(name='a', attrs={'class':'Arialazul12'})]
-                     
+
    feeds = [
               (u'Economia'                , u'http://www.cronista.com/adjuntos/8/rss/Economia_EI.xml'             )
              ,(u'Negocios'                , u'http://www.cronista.com/adjuntos/8/rss/negocios_EI.xml'             )
@ -69,4 +69,4 @@ class ElCronista(BasicNewsRecipe):
        if link_item:
           cover_url = index + link_item.img['src']
        return cover_url
-        
+
--- a/resources/recipes/eltiempo_hn.recipe
+++ b/resources/recipes/eltiempo_hn.recipe
@ -21,7 +21,7 @@ class ElTiempoHn(BasicNewsRecipe):
    no_stylesheets        = True
    remove_javascript     = True
    encoding              = 'utf-8'
-    language = 'es'
+    language = 'es_HN'

    lang                  = 'es-HN'
    direction             = 'ltr'
--- a/resources/recipes/eluniversal_ve.recipe
+++ b/resources/recipes/eluniversal_ve.recipe
@ -18,7 +18,7 @@ class ElUniversal(BasicNewsRecipe):
    encoding               = 'cp1252'
    publisher              = 'El Universal'
    category               = 'news, Caracas, Venezuela, world'
-    language               = 'es'
+    language               = 'es_VE'
    cover_url              = strftime('http://static.eluniversal.com/%Y/%m/%d/portada.jpg')

    conversion_options = {
--- a/resources/recipes/eluniversalimpresa.recipe
+++ b/resources/recipes/eluniversalimpresa.recipe
@ -3,7 +3,7 @@ from calibre.web.feeds.news import BasicNewsRecipe
 class ElUniversalImpresaRecipe(BasicNewsRecipe):
    __license__  = 'GPL v3'
    __author__ = 'kwetal'
-    language = 'es'
+    language = 'es_MX'
    version = 1

    title = u'El Universal (Edici\u00F3n Impresa)'
--- a/resources/recipes/eluniverso_ec.recipe
+++ b/resources/recipes/eluniverso_ec.recipe
@ -17,7 +17,7 @@ class ElUniverso_Ecuador(BasicNewsRecipe):
    no_stylesheets        = True
    encoding              = 'utf8'
    use_embedded_content  = False
-    language              = 'es'
+    language              = 'es_EC'
    remove_empty_feeds    = True
    publication_type      = 'newspaper'
    masthead_url          = 'http://servicios2.eluniverso.com/versiones/v1/img/Hd/lg_ElUniverso.gif'
--- a/resources/recipes/explosm.recipe
+++ b/resources/recipes/explosm.recipe
@ -0,0 +1,54 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+import re
+
+class Explosm(BasicNewsRecipe):
+    title              = u'Explosm Rotated'
+    __author__        = 'Andromeda Rabbit'
+    description      = 'Explosm'
+    language            = 'en'
+    use_embedded_content = False
+    no_stylesheets    = True
+    oldest_article    = 24
+    remove_javascript   = True
+    remove_empty_feeds  = True
+    max_articles_per_feed = 10
+
+    feeds = [
+             (u'Explosm Feed', u'http://feeds.feedburner.com/Explosm')
+             ]
+
+    #match_regexps = [r'http://www.explosm.net/comics/.*']
+
+    keep_only_tags   = [dict(name='img', attrs={'alt':'Cyanide and Happiness, a daily webcomic'})]
+    remove_tags = [dict(name='div'), dict(name='span'), dict(name='table'), dict(name='br'), dict(name='nobr'), dict(name='a'), dict(name='b')]
+
+    extra_css = '''
+                    h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
+                    h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
+                    p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
+                    body{font-family:Helvetica,Arial,sans-serif;font-size:small;}'''
+
+    def get_cover_url(self):
+        return 'http://cdn.shopify.com/s/files/1/0059/1872/products/cyanidetitle_large.jpg?1295846286'
+
+    def parse_feeds(self):
+        feeds = BasicNewsRecipe.parse_feeds(self)
+
+        for curfeed in feeds:
+            delList = []
+            for a,curarticle in enumerate(curfeed.articles):
+                if re.search(r'http://www.explosm.net/comics', curarticle.url) == None:
+                    delList.append(curarticle)
+            if len(delList)>0:
+                for d in delList:
+                    index = curfeed.articles.index(d)
+                    curfeed.articles[index:index+1] = []
+
+        return feeds
+
+    def skip_ad_pages(self, soup):
+        # Skip ad pages served before actual article
+        skip_tag = soup.find(name='img', attrs={'alt':'Cyanide and Happiness, a daily webcomic'})
+        if skip_tag is None:
+            return soup
+        return None
--- a/resources/recipes/freeway.recipe
+++ b/resources/recipes/freeway.recipe
@ -12,7 +12,7 @@ class General(BasicNewsRecipe):
    title                 = 'freeway.com.uy'
    __author__            = 'Gustavo Azambuja'
    description           = 'Revista Freeway, Montevideo, Uruguay'
-    language       = 'es'
+    language       = 'es_UY'
    timefmt        = '[%a, %d %b, %Y]'
    use_embedded_content  = False
    recursion             = 1
--- a/resources/recipes/granma.recipe
+++ b/resources/recipes/granma.recipe
@ -20,7 +20,7 @@ class Granma(BasicNewsRecipe):
    use_embedded_content  = False
    encoding              = 'cp1252'
    cover_url             = 'http://www.granma.cubaweb.cu/imagenes/granweb229d.jpg'
-    language = 'es'
+    language = 'es_CU'

    remove_javascript     = True

--- a/resources/recipes/ieco.recipe
+++ b/resources/recipes/ieco.recipe
@ -18,7 +18,7 @@ class iEco(BasicNewsRecipe):
    encoding              = 'utf-8'
    publisher             = 'Grupo Clarin'
    category              = 'news, economia, mercados, bolsa de valores, finanzas, empresas, negocios, empleos, emprendedores, marketinguniversidades, tecnologia, agronegocios, noticias, informacion'
-    language              = 'es'
+    language              = 'es_AR'
    cover_url             = 'http://www.ieco.clarin.com/static2/images/Tapa-PDF.gif'
    extra_css             = ' #bd{font-family: sans-serif} '

--- a/resources/recipes/infobae.recipe
+++ b/resources/recipes/infobae.recipe
@ -16,7 +16,7 @@ class Infobae(BasicNewsRecipe):
    max_articles_per_feed = 100
    no_stylesheets        = True
    use_embedded_content  = False
-    language              = 'es'
+    language              = 'es_AR'
    encoding              = 'cp1252'
    masthead_url          = 'http://www.infobae.com/imgs/header/header.gif'
    remove_javascript     = True
@ -25,7 +25,7 @@ class Infobae(BasicNewsRecipe):
                              body{font-family:Arial,Helvetica,sans-serif;}
                              .popUpTitulo{color:#0D4261; font-size: xx-large}
                            '''
-    
+
    conversion_options = {
                          'comment'          : description
                        , 'tags'             : category
@ -33,7 +33,7 @@ class Infobae(BasicNewsRecipe):
                        , 'language'         : language
                        , 'linearize_tables' : True
                        }
-    
+

    feeds = [
              (u'Noticias'  , u'http://www.infobae.com/adjuntos/html/RSS/hoy.xml'       )
--- a/resources/recipes/juventudrebelde.recipe
+++ b/resources/recipes/juventudrebelde.recipe
@ -20,7 +20,7 @@ class Juventudrebelde(BasicNewsRecipe):
    no_stylesheets        = True
    use_embedded_content  = False
    encoding              = 'cp1252'
-    language = 'es'
+    language = 'es_CU'

    cover_url             = strftime('http://www.juventudrebelde.cu/UserFiles/File/impreso/iportada-%Y-%m-%d.jpg')
    remove_javascript     = True
--- a/resources/recipes/la_cuarta.recipe
+++ b/resources/recipes/la_cuarta.recipe
@ -50,4 +50,4 @@ class LaCuarta(BasicNewsRecipe):
    feeds = [(u'Noticias', u'http://lacuarta.cl/app/rss?sc=TEFDVUFSVEE=')]


-    language = 'es'
+    language = 'es_CL'
--- a/resources/recipes/la_diaria.recipe
+++ b/resources/recipes/la_diaria.recipe
@ -12,7 +12,7 @@ class General(BasicNewsRecipe):
    title                 = 'La Diaria'
    __author__            = 'Gustavo Azambuja'
    description           = 'Noticias de Uruguay'
-    language       = 'es'
+    language       = 'es_UY'
    timefmt        = '[%a, %d %b, %Y]'
    use_embedded_content  = False
    recursion             = 5
--- a/resources/recipes/la_jornada.recipe
+++ b/resources/recipes/la_jornada.recipe
@ -19,7 +19,7 @@ class LaJornada_mx(BasicNewsRecipe):
    no_stylesheets        = True
    encoding              = 'utf8'
    use_embedded_content  = False
-    language              = 'es'
+    language              = 'es_MX'
    remove_empty_feeds    = True
    cover_url             = strftime("http://www.jornada.unam.mx/%Y/%m/%d/portada.pdf")
    masthead_url          = 'http://www.jornada.unam.mx/v7.0/imagenes/la-jornada-trans.png'
@ -34,8 +34,8 @@ class LaJornada_mx(BasicNewsRecipe):
                                .credito{font-weight: bold; margin-left: 1em}
                                .credito-autor{font-variant: small-caps; font-weight: bold }
                                .credito-titulo{text-align: right}
-                                .hemero{text-align: right; font-size: 0.9em; margin-bottom: 0.5em } 
-                                .loc{font-weight: bold} 
+                                .hemero{text-align: right; font-size: 0.9em; margin-bottom: 0.5em }
+                                .loc{font-weight: bold}
                                .carton{text-align: center}
                                .credit{font-weight: bold}
                                .sumario{font-weight: bold; text-align: center}
@ -56,7 +56,7 @@ class LaJornada_mx(BasicNewsRecipe):
                                       ,re.DOTALL|re.IGNORECASE)
                                       ,lambda match: '<p class="inicial">' + match.group(1) + '</p><p class="s-s">')
                         ]
-                        
+
    keep_only_tags = [
                         dict(name='div', attrs={'class':['documentContent','cabeza','sumarios','credito-articulo','text','carton']})
                        ,dict(name='div', attrs={'id':'renderComments'})
@ -88,4 +88,4 @@ class LaJornada_mx(BasicNewsRecipe):
    def get_article_url(self, article):
        rurl = article.get('link',  None)
        return rurl.rpartition('&partner=')[0]
-        
+
--- a/resources/recipes/la_razon_bo.recipe
+++ b/resources/recipes/la_razon_bo.recipe
@ -18,7 +18,7 @@ class LaRazon_Bol(BasicNewsRecipe):
    no_stylesheets        = True
    encoding              = 'cp1252'
    use_embedded_content  = False
-    language              = 'es'
+    language              = 'es_BO'
    publication_type      = 'newspaper'
    delay                 = 1
    remove_empty_feeds    = True
--- a/resources/recipes/la_segunda.recipe
+++ b/resources/recipes/la_segunda.recipe
@ -9,7 +9,7 @@ from calibre.web.feeds.news import BasicNewsRecipe
 class LaSegunda(BasicNewsRecipe):
    title                 = 'La Segunda'
    __author__            = 'Darko Miletic'
-    description           = 'El sitio de noticias online de Chile' 
+    description           = 'El sitio de noticias online de Chile'
    publisher             = 'La Segunda'
    category              = 'news, politics, Chile'
    oldest_article        = 2
@ -19,9 +19,9 @@ class LaSegunda(BasicNewsRecipe):
    encoding              = 'cp1252'
    masthead_url          = 'http://www.lasegunda.com/imagenes/logotipo_lasegunda_Oli.gif'
    remove_empty_feeds    = True
-    language              = 'es'
-    extra_css             = ' .titulonegritastop{font-size: xx-large; font-weight: bold} '            
-    
+    language              = 'es_CL'
+    extra_css             = ' .titulonegritastop{font-size: xx-large; font-weight: bold} '
+
    conversion_options = {
                          'comment'          : description
                        , 'tags'             : category
@ -29,13 +29,13 @@ class LaSegunda(BasicNewsRecipe):
                        , 'language'         : language
 						, 'linearize_tables' : True
                        }
-                        
+
    remove_tags_before = dict(attrs={'class':'titulonegritastop'})
    remove_tags        = [dict(name='img')]
    remove_attributes  = ['width','height']
-	
-                        
-    feeds = [ 
+
+
+    feeds = [
               (u'Noticias de ultima hora', u'http://www.lasegunda.com/rss20/index.asp?canal=0')
              ,(u'Politica'               , u'http://www.lasegunda.com/rss20/index.asp?canal=21')
              ,(u'Cronica'                , u'http://www.lasegunda.com/rss20/index.asp?canal=20')
@ -49,6 +49,6 @@ class LaSegunda(BasicNewsRecipe):
            ]

    def print_version(self, url):
-        rest, sep, article_id = url.partition('index.asp?idnoticia=')        
+        rest, sep, article_id = url.partition('index.asp?idnoticia=')
        return u'http://www.lasegunda.com/edicionOnline/include/secciones/_detalle_impresion.asp?idnoticia=' + article_id
-    
+
--- a/resources/recipes/lamujerdemivida.recipe
+++ b/resources/recipes/lamujerdemivida.recipe
@ -11,15 +11,15 @@ from calibre.web.feeds.news import BasicNewsRecipe
 class LaMujerDeMiVida(BasicNewsRecipe):
    title                 = 'La Mujer de mi Vida'
    __author__            = 'Darko Miletic'
-    description           = 'Cultura de otra manera'    
+    description           = 'Cultura de otra manera'
    oldest_article        = 90
    max_articles_per_feed = 100
    no_stylesheets        = True
    use_embedded_content  = False
    encoding              = 'cp1252'
    publisher             = 'La Mujer de mi Vida'
-    category              = 'literatura, critica, arte, ensayos'    
-    language = 'es'
+    category              = 'literatura, critica, arte, ensayos'
+    language = 'es_AR'

    INDEX                 = 'http://www.lamujerdemivida.com.ar/'
    html2lrf_options = [
@ -28,8 +28,8 @@ class LaMujerDeMiVida(BasicNewsRecipe):
                        , '--publisher', publisher
                        , '--ignore-tables'
                        ]
-    
-    html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True' 
+
+    html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True'

    keep_only_tags = [dict(name='table', attrs={'width':'570'})]

@ -51,7 +51,7 @@ class LaMujerDeMiVida(BasicNewsRecipe):
        if cover_item:
           cover_url = self.INDEX + cover_item['src']
        return cover_url
-    
+
    def parse_index(self):
        totalfeeds = []
        lfeeds = self.get_feeds()
@ -74,4 +74,4 @@ class LaMujerDeMiVida(BasicNewsRecipe):
                                    })
            totalfeeds.append((feedtitle, articles))
        return totalfeeds
-                
+
--- a/resources/recipes/lanacion.recipe
+++ b/resources/recipes/lanacion.recipe
@ -16,17 +16,17 @@ class Lanacion(BasicNewsRecipe):
    max_articles_per_feed = 100
    use_embedded_content  = False
    no_stylesheets        = True
-    language              = 'es'
+    language              = 'es_AR'
    publication_type      = 'newspaper'
-    remove_empty_feeds    = True    
+    remove_empty_feeds    = True
    masthead_url          = 'http://www.lanacion.com.ar/imgs/layout/logos/ln341x47.gif'
    extra_css             = """ h1{font-family: Georgia,serif}
-                                h2{color: #626262}    
-                                body{font-family: Arial,sans-serif} 
+                                h2{color: #626262}
+                                body{font-family: Arial,sans-serif}
                                img{margin-top: 0.5em; margin-bottom: 0.2em; display: block}
-                                .notaFecha{color: #808080}                                
-                                .notaEpigrafe{font-size: x-small} 
-                                .topNota h1{font-family: Arial,sans-serif} 
+                                .notaFecha{color: #808080}
+                                .notaEpigrafe{font-size: x-small}
+                                .topNota h1{font-family: Arial,sans-serif}
                            """


@ -45,7 +45,7 @@ class Lanacion(BasicNewsRecipe):
                    ,dict(attrs={'class':['titulosMultimedia','derecha','techo color','encuesta','izquierda compartir','floatFix','videoCentro']})
                    ,dict(name=['iframe','embed','object','form','base','hr','meta','link','input'])
                  ]
-    remove_tags_after = dict(attrs={'class':['tags','nota-destacado']})                
+    remove_tags_after = dict(attrs={'class':['tags','nota-destacado']})
    remove_attributes = ['height','width','visible','onclick','data-count','name']

    feeds          = [
--- a/resources/recipes/lanacion_chile.recipe
+++ b/resources/recipes/lanacion_chile.recipe
@ -51,4 +51,4 @@ class LaNacionChile(BasicNewsRecipe):
            del item['style']
        return soup

-    language = 'es'
+    language = 'es_CL'
--- a/resources/recipes/laprensa.recipe
+++ b/resources/recipes/laprensa.recipe
@ -21,9 +21,9 @@ class LaPrensa(BasicNewsRecipe):
    encoding              = 'cp1252'
   # cover_url             = 'http://www.laprensa.com.ar/imgs/logo.gif'
    remove_javascript     = True
-    language = 'es'
+    language = 'es_AR'
    lang = 'es'
-    
+
    html2lrf_options = [
                          '--comment', description
                        , '--category', category
@ -32,7 +32,7 @@ class LaPrensa(BasicNewsRecipe):

    html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
    filter_regexps = [r'.*archive.aspx.*']
-   
+
    remove_tags  = [
                    dict(name='td', attrs={'class':["link-registro","link-buscador"]}),
                    dict(name='td', attrs={'id':["TDTabItem1","TDTabItem2","TDTabItem3","TDTabItem4"]}),
@ -58,9 +58,9 @@ class LaPrensa(BasicNewsRecipe):
                    dict(name='img', src = "/versions/1/imgs/separador-linea-azul.gif"),
                    dict(name='img', src = " /versions/1/imgs/separador-linea.gif"),
                    dict(name='a',text ="Powered by Civinext Groupware - V. 2.0.3567.23706"),
-                    dict(name='img', height ="0")                   
+                    dict(name='img', height ="0")
                    ]
-                            
+
    extra_css = '''
                    .seccion{font-size:xx-small;}
                    body{font-family:Arial,Helvetica,sans-serif;font-size:x-small;}
@ -69,7 +69,7 @@ class LaPrensa(BasicNewsRecipe):
                    .fecha{font-size:xx-small;}
                    .volanta{font-size:xx-small;}
                '''
-    
+
    feeds = [
              (u'Politica'    , u'http://www.laprensa.com.ar/ResourcesManager.aspx?Resource=Rss.aspx&Rss=4' )
             ,(u'Economia'    , u'http://www.laprensa.com.ar/ResourcesManager.aspx?Resource=Rss.aspx&Rss=5' )
@ -80,14 +80,14 @@ class LaPrensa(BasicNewsRecipe):
             ,(u'Espectaculos', u'http://www.laprensa.com.ar/ResourcesManager.aspx?Resource=Rss.aspx?Rss=10')
            ]

-    
+
    def preprocess_html(self, soup):
-        
+
        for t in soup.findAll(['table','td','tr','span','tbody']):
            t.name = 'div'
        for t in soup.findAll(['hr']):
            t.extract()
-        
+
        mtag = '<meta http-equiv="Content-Language" content="es-AR"/>'
        soup.head.insert(0,mtag)
        for item in soup.findAll(style=True):
@ -95,8 +95,8 @@ class LaPrensa(BasicNewsRecipe):
        for item in soup.findAll(align = "center"):
                del item['align']
        for item in soup.findAll(bgcolor="ffffff"):
-            del item['bgcolor']               
+            del item['bgcolor']
        return soup
-    
-   
-    
+
+
+
--- a/resources/recipes/laprensa_hn.recipe
+++ b/resources/recipes/laprensa_hn.recipe
@ -21,7 +21,7 @@ class LaPrensaHn(BasicNewsRecipe):
    no_stylesheets        = True
    remove_javascript     = True
    encoding              = 'utf-8'
-    language = 'es'
+    language = 'es_HN'

    lang                  = 'es-HN'
    direction             = 'ltr'
--- a/resources/recipes/laprensa_ni.recipe
+++ b/resources/recipes/laprensa_ni.recipe
@ -22,7 +22,7 @@ class LaPrensa_ni(BasicNewsRecipe):
    use_embedded_content  = False
    encoding              = 'cp1252'
    remove_javascript     = True
-    language = 'es'
+    language = 'es_NI'

    months_es             = ['enero','febrero','marzo','abril','mayo','junio','julio','agosto','septiembre','octubre','noviembre','diciembre']
    current_month         = months_es[datetime.date.today().month - 1]
--- a/resources/recipes/latimes.recipe
+++ b/resources/recipes/latimes.recipe
@ -1,73 +1,92 @@
-#!/usr/bin/env  python
-
 __license__   = 'GPL v3'
-__copyright__ = '2008-2009, Darko Miletic <darko.miletic at gmail.com>'
+__copyright__ = '2008-2011, Darko Miletic <darko.miletic at gmail.com>'
 '''
-latimes.com
+www.latimes.com
 '''
+
 from calibre.web.feeds.news import BasicNewsRecipe

 class LATimes(BasicNewsRecipe):
-    title                 = u'The Los Angeles Times'
-    __author__            = u'Darko Miletic and Sujata Raman'
-    description           = u'News from Los Angeles'
-    oldest_article        = 7
-    max_articles_per_feed = 100
-    language              = 'en'
+    title                 = 'Los Angeles Times'
+    __author__            = 'Darko Miletic'
+    description           = 'The Los Angeles Times is a leading source of news on Southern California, entertainment, movies, television, music, politics, business, health, technology, travel, sports, environment, economics, autos, jobs, real estate and other topics affecting California'
+    publisher             = 'Tribune Company'
+    category              = 'news, politics, USA, Los Angeles, world'
+    oldest_article        = 2
+    max_articles_per_feed = 200
    no_stylesheets        = True
+    encoding              = 'utf8'
    use_embedded_content  = False
-    encoding              = 'utf-8'
-    lang                  = 'en-US'
+    language              = 'en'
+    remove_empty_feeds    = True
+    publication_type      = 'newspaper'
+    masthead_url          = 'http://www.latimes.com/images/logo.png'
+    cover_url             = 'http://www.latimes.com/includes/sectionfronts/A1.pdf'
+    extra_css             = """
+                               body{font-family: Georgia,"Times New Roman",Times,serif }
+                               img{margin-bottom: 0.4em; margin-top: 0.8em; display:block}
+                               h2{font-size: 1.1em}
+                               .deckhead{font-size: small; text-transform: uppercase}
+                               .small{color: gray; font-size: small}
+                               .date,.time,.copyright{font-size: x-small; color:gray; font-style:italic;}
+                            """

    conversion_options = {
-          'comment'          : description
-        , 'language'         : lang
-    }
+                          'comment'          : description
+                        , 'tags'             : category
+                        , 'publisher'        : publisher
+                        , 'language'         : language
+                        , 'linearize_tables' : 'Yes'
+                        }

-    extra_css = '''
-                h1{font-family :Georgia,"Times New Roman",Times,serif; font-size:large; }
-                h2{font-family :Georgia,"Times New Roman",Times,serif; font-size:x-small;}
-                .story{font-family :Georgia,"Times New Roman",Times,serif; font-size: x-small;}
-                .entry-body{font-family :Georgia,"Times New Roman",Times,serif; font-size: x-small;}
-                .entry-more{font-family :Georgia,"Times New Roman",Times,serif; font-size: x-small;}
-                .credit{color:#666666; font-family :Georgia,"Times New Roman",Times,serif; font-size: xx-small;}
-                .small{color:#666666; font-family :Georgia,"Times New Roman",Times,serif; font-size: xx-small;}
-                .byline{font-family :Georgia,"Times New Roman",Times,serif; font-size: xx-small;}
-                .date{font-family :Georgia,"Times New Roman",Times,serif; font-size: xx-small;color:#930000; font-style:italic;}
-                .time{font-family :Georgia,"Times New Roman",Times,serif; font-size: xx-small;color:#930000; font-style:italic;}
-                .copyright{font-family :Georgia,"Times New Roman",Times,serif; font-size: xx-small;color:#930000; }
-                .subhead{font-family :Georgia,"Times New Roman",Times,serif; font-size:x-small;}
-                '''
-
-   # recursions = 1
-   # match_regexps = [r'http://www.latimes.com/.*page=[2-9]']
-
-    keep_only_tags    = [dict(name='div', attrs={'class':["story"  ,"entry"] })]
+    keep_only_tags = [
+                        dict(name='div', attrs={'class':'story'})
+                       ,dict(attrs={'class':['entry-header','time','entry-content']})
+                     ]
+    remove_tags_after=dict(name='p', attrs={'class':'copyright'})
+    remove_tags = [
+                     dict(name=['meta','link','iframe','object','embed'])
+                    ,dict(attrs={'class':['toolSet','articlerail','googleAd','entry-footer-left','entry-footer-right','entry-footer-social','google-ad-story-bottom','sphereTools']})
+                    ,dict(attrs={'id':['article-promo','googleads','moduleArticleToolsContainer','gallery-subcontent']})
+                  ]
+    remove_attributes=['lang','xmlns:fb','xmlns:og','border','xtags','i','article_body']


-    remove_tags      = [   dict(name='div', attrs={'class':['articlerail',"sphereTools","tools","toppaginate","entry-footer-left","entry-footer-right"]}),
-                            dict(name='div', attrs={'id':["moduleArticleToolsContainer",]}),
-                            dict(name='p', attrs={'class':["entry-footer",]}),
-                           dict(name='ul', attrs={'class':"article-nav clearfix"}),
-                            dict(name=['iframe'])
-                        ]
-
-
-    feeds          = [(u'News', u'http://feeds.latimes.com/latimes/news')
-                      ,(u'Local','http://feeds.latimes.com/latimes/news/local')
-                      ,(u'MostEmailed','http://feeds.latimes.com/MostEmailed')
-                      ,(u'Politics','http://feeds.latimes.com/latimes/news/local/politics/cal/')
-                      ,('OrangeCounty','http://feeds.latimes.com/latimes/news/local/orange/')
-                      ,('National','http://feeds.latimes.com/latimes/news/nationworld/nation')
-                      ,('Politics','http://feeds.latimes.com/latimes/news/politics/')
-                      ,('Business','http://feeds.latimes.com/latimes/business')
-                      ,('Sports','http://feeds.latimes.com/latimes/sports/')
-                      ,('Entertainment','http://feeds.latimes.com/latimes/entertainment/')
-                      ]
-
+    feeds = [
+              (u'Top News'             , u'http://feeds.latimes.com/latimes/news'                           )
+             ,(u'Local News'           , u'http://feeds.latimes.com/latimes/news/local'                     )
+             ,(u'National'             , u'http://feeds.latimes.com/latimes/news/nationworld/nation'        )
+             ,(u'National Politics'    , u'http://feeds.latimes.com/latimes/news/politics/'                 )
+             ,(u'Business'             , u'http://feeds.latimes.com/latimes/business'                       )
+             ,(u'Education'            , u'http://feeds.latimes.com/latimes/news/education'                 )
+             ,(u'Environment'          , u'http://feeds.latimes.com/latimes/news/science/environment'       )
+             ,(u'Religion'             , u'http://feeds.latimes.com/latimes/features/religion'              )
+             ,(u'Science'              , u'http://feeds.latimes.com/latimes/news/science'                   )
+             ,(u'Technology'           , u'http://feeds.latimes.com/latimes/technology'                     )
+             ,(u'Africa'               , u'http://feeds.latimes.com/latimes/africa'                         )
+             ,(u'Asia'                 , u'http://feeds.latimes.com/latimes/asia'                           )
+             ,(u'Europe'               , u'http://feeds.latimes.com/latimes/europe'                         )
+             ,(u'Latin America'        , u'http://feeds.latimes.com/latimes/latinamerica'                   )
+             ,(u'Middle East'          , u'http://feeds.latimes.com/latimes/middleeast'                     )
+             ,(u'Arts&Culture'         , u'http://feeds.feedburner.com/latimes/entertainment/news/arts'     )
+             ,(u'Entertainment News'   , u'http://feeds.feedburner.com/latimes/entertainment/news/'         )
+             ,(u'Movie News'           , u'http://feeds.feedburner.com/latimes/entertainment/news/movies/'  )
+             ,(u'Movie Reviews'        , u'http://feeds.feedburner.com/movies/reviews/'                     )
+             ,(u'Music News'           , u'http://feeds.feedburner.com/latimes/entertainment/news/music/'   )
+             ,(u'Pop Album Reviews'    , u'http://feeds.feedburner.com/latimes/pop-album-reviews'           )
+             ,(u'Restaurant Reviews'   , u'http://feeds.feedburner.com/latimes/restaurant/reviews'          )
+             ,(u'Theatar and Dance'    , u'http://feeds.feedburner.com/latimes/theaterdance'                )
+             ,(u'Autos'                , u'http://feeds.latimes.com/latimes/classified/automotive/highway1/')
+             ,(u'Books'                , u'http://feeds.latimes.com/features/books'                         )
+             ,(u'Food'                 , u'http://feeds.latimes.com/latimes/features/food/'                 )
+             ,(u'Health'               , u'http://feeds.latimes.com/latimes/features/health/'               )
+             ,(u'Real Estate'          , u'http://feeds.latimes.com/latimes/classified/realestate/'         )
+             ,(u'Commentary'           , u'http://feeds2.feedburner.com/latimes/news/opinion/commentary/'   )
+             ,(u'Sports'               , u'http://feeds.latimes.com/latimes/sports/'                        )
+            ]

    def get_article_url(self, article):
-        ans = article.get('feedburner_origlink').rpartition('?')[0]
+        ans = BasicNewsRecipe.get_article_url(self, article).rpartition('?')[0]

        try:
            self.log('Looking for full story link in', ans)
@ -83,4 +102,22 @@ class LATimes(BasicNewsRecipe):
            pass
        return ans

-
+    def preprocess_html(self, soup):
+        for item in soup.findAll(style=True):
+            del item['style']
+        for item in soup.findAll('img'):
+            if not item.has_key('alt'):
+               item['alt'] = 'image'
+        for item in soup.findAll('a'):
+            limg = item.find('img')
+            if item.string is not None:
+               str = item.string
+               item.replaceWith(str)
+            else:
+               if limg:
+                  item.name  ='div'
+                  item.attrs =[]
+               else:
+                   str = self.tag_to_string(item)
+                   item.replaceWith(str)
+        return soup
--- a/resources/recipes/latribuna.recipe
+++ b/resources/recipes/latribuna.recipe
@ -21,7 +21,7 @@ class LaTribuna(BasicNewsRecipe):
    no_stylesheets        = True
    remove_javascript     = True
    encoding              = 'utf-8'
-    language = 'es'
+    language = 'es_HN'

    lang                  = 'es-HN'
    direction             = 'ltr'
--- a/resources/recipes/los_tiempos_bo.recipe
+++ b/resources/recipes/los_tiempos_bo.recipe
@ -18,7 +18,7 @@ class LosTiempos_Bol(BasicNewsRecipe):
    no_stylesheets        = True
    encoding              = 'cp1252'
    use_embedded_content  = False
-    language              = 'es'
+    language              = 'es_BO'
    publication_type      = 'newspaper'
    delay                 = 1
    remove_empty_feeds    = True
--- a/resources/recipes/milenio.recipe
+++ b/resources/recipes/milenio.recipe
@ -12,7 +12,7 @@ import datetime
 class Milenio(BasicNewsRecipe):
    title                 = u'Milenio-diario'
    __author__            = 'Bmsleight'
-    language              = 'es'
+    language              = 'es_MX'
    description           = 'Milenio-diario'
    oldest_article        = 10
    max_articles_per_feed = 100
--- a/resources/recipes/miradasalsur.recipe
+++ b/resources/recipes/miradasalsur.recipe
@ -20,7 +20,7 @@ class MiradasAlSur(BasicNewsRecipe):
    no_stylesheets        = True
    use_embedded_content  = False
    encoding              = 'utf-8'
-    language = 'es'
+    language = 'es_AR'

    lang                  = 'es-AR'
    direction             = 'ltr'
--- a/resources/recipes/montevideo_com.recipe
+++ b/resources/recipes/montevideo_com.recipe
@ -12,7 +12,7 @@ class Noticias(BasicNewsRecipe):
    title                 = 'Montevideo COMM'
    __author__            = 'Gustavo Azambuja'
    description           = 'Noticias de Uruguay'
-    language       = 'es'
+    language       = 'es_UY'
    timefmt        = '[%a, %d %b, %Y]'
    use_embedded_content  = False
    recursion             = 5
--- a/resources/recipes/newsweek_argentina.recipe
+++ b/resources/recipes/newsweek_argentina.recipe
@ -20,7 +20,7 @@ class Newsweek_Argentina(BasicNewsRecipe):
    no_stylesheets        = True
    use_embedded_content  = False
    encoding              = 'utf-8'
-    language = 'es'
+    language = 'es_AR'

    lang                  = 'es-AR'
    direction             = 'ltr'
--- a/resources/recipes/observa_digital.recipe
+++ b/resources/recipes/observa_digital.recipe
@ -12,7 +12,7 @@ class Noticias(BasicNewsRecipe):
    title                 = 'Observa Digital'
    __author__ = '2010, Gustavo Azambuja <hola at gazambuja.com>'
    description           = 'Noticias desde Uruguay'
-    language       = 'es'
+    language       = 'es_UY'
    timefmt        = '[%a, %d %b, %Y]'
    use_embedded_content  = False
    recursion             = 5
--- a/resources/recipes/pagina12.recipe
+++ b/resources/recipes/pagina12.recipe
@ -19,15 +19,15 @@ class Pagina12(BasicNewsRecipe):
    no_stylesheets        = True
    encoding              = 'cp1252'
    use_embedded_content  = False
-    language              = 'es'
+    language              = 'es_AR'
    remove_empty_feeds    = True
    publication_type      = 'newspaper'
    masthead_url          = 'http://www.pagina12.com.ar/commons/imgs/logo-home.gif'
-    extra_css             = """ 
-                               body{font-family: Arial,Helvetica,sans-serif } 
+    extra_css             = """
+                               body{font-family: Arial,Helvetica,sans-serif }
                               img{margin-bottom: 0.4em; display:block}
-                               #autor{font-weight: bold} 
-                               #fecha,#epigrafe{font-size: 0.9em; margin: 5px} 
+                               #autor{font-weight: bold}
+                               #fecha,#epigrafe{font-size: 0.9em; margin: 5px}
                               #imagen{border: 1px solid black; margin: 0 0 1.25em 1.25em; width: 232px }
                               .fgprincipal{font-size: large; font-weight: bold}
                            """
@ -83,7 +83,7 @@ class Pagina12(BasicNewsRecipe):
            del it['href']
            del it['title']
        for item in soup.findAll('p'):
-            it = item.find('h3')            
+            it = item.find('h3')
            if it:
               it.name='span'
-        return soup
+        return soup
--- a/resources/recipes/perfil.recipe
+++ b/resources/recipes/perfil.recipe
@ -17,7 +17,7 @@ class Perfil(BasicNewsRecipe):
    no_stylesheets        = True
    encoding              = 'cp1252'
    use_embedded_content  = False
-    language              = 'es'
+    language              = 'es_AR'
    remove_empty_feeds    = True
    masthead_url          = 'http://www.perfil.com/export/sites/diarioperfil/arte/10/logo_perfilcom_mm.gif'
    extra_css             = """
--- a/resources/recipes/reptantes.recipe
+++ b/resources/recipes/reptantes.recipe
@ -13,7 +13,7 @@ class Reptantes(BasicNewsRecipe):
    description           = u"cada vez que te haces acupuntura, tu muñeco vudú sufre en algún lado"
    oldest_article        = 130
    max_articles_per_feed = 100
-    language              = 'es'
+    language              = 'es_AR'
    encoding              = 'utf-8'
    no_stylesheets        = True
    use_embedded_content  = False
--- a/resources/recipes/revista_bla.recipe
+++ b/resources/recipes/revista_bla.recipe
@ -12,7 +12,7 @@ class Noticias(BasicNewsRecipe):
    title                 = 'Revista Bla'
    __author__            = 'Gustavo Azambuja'
    description           = 'Moda | Uruguay'
-    language       = 'es'
+    language       = 'es_UY'
    timefmt        = '[%a, %d %b, %Y]'
    use_embedded_content  = False
    recursion             = 5
--- a/resources/recipes/veintitres.recipe
+++ b/resources/recipes/veintitres.recipe
@ -20,7 +20,7 @@ class Veintitres(BasicNewsRecipe):
    no_stylesheets        = True
    use_embedded_content  = False
    encoding              = 'utf-8'
-    language = 'es'
+    language = 'es_AR'

    lang                  = 'es-AR'
    direction             = 'ltr'
--- a/setup/installer/linux/freeze2.py
+++ b/setup/installer/linux/freeze2.py
@ -360,6 +360,9 @@ class LinuxFreeze(Command):
            def main():
                try:
                    sys.argv[0] = sys.calibre_basename
+                    dfv = os.environ.get('CALIBRE_DEVELOP_FROM', None)
+                    if dfv and os.path.exists(dfv):
+                        sys.path.insert(0, os.path.abspath(dfv))
                    set_default_encoding()
                    set_helper()
                    set_qt_plugin_path()
--- a/src/calibre/ebooks/chm/reader.py
+++ b/src/calibre/ebooks/chm/reader.py
@ -139,6 +139,13 @@ class CHMReader(CHMFile):
        if self.hhc_path not in files and files:
            self.hhc_path = files[0]

+        if self.hhc_path == '.hhc' and self.hhc_path not in files:
+            from calibre import walk
+            for x in walk(output_dir):
+                if os.path.basename(x).lower() in ('index.htm', 'index.html'):
+                    self.hhc_path = os.path.relpath(x, output_dir)
+                    break
+
    def _reformat(self, data, htmlpath):
        try:
            data = xml_to_unicode(data, strip_encoding_pats=True)[0]
--- a/src/calibre/ebooks/comic/input.py
+++ b/src/calibre/ebooks/comic/input.py
@ -53,7 +53,7 @@ def find_pages(dir, sort_on_mtime=False, verbose=False):
        prints('\t'+'\n\t'.join([os.path.basename(p) for p in pages]))
    return pages

-class PageProcessor(list):
+class PageProcessor(list): # {{{
    '''
    Contains the actual image rendering logic. See :method:`render` and
    :method:`process_pages`.
@ -111,6 +111,13 @@ class PageProcessor(list):

            SCRWIDTH, SCRHEIGHT = self.opts.output_profile.comic_screen_size

+            try:
+                if self.opts.comic_image_size:
+                    SCRWIDTH, SCRHEIGHT = map(int, [x.strip() for x in
+                        self.opts.comic_image_size.split('x')])
+            except:
+                pass # Ignore
+
            if self.opts.keep_aspect_ratio:
                # Preserve the aspect ratio by adding border
                aspect = float(sizex) / float(sizey)
@ -170,6 +177,7 @@ class PageProcessor(list):
                dest = dest[:-1]
                os.rename(dest+'8', dest)
            self.append(dest)
+# }}}

 def render_pages(tasks, dest, opts, notification=lambda x, y: x):
    '''
@ -291,7 +299,11 @@ class ComicInput(InputFormatPlugin):
        OptionRecommendation(name='no_process', recommended_value=False,
              help=_("Apply no processing to the image")),
        OptionRecommendation(name='dont_grayscale', recommended_value=False,
-            help=_('Do not convert the image to grayscale (black and white)'))
+            help=_('Do not convert the image to grayscale (black and white)')),
+        OptionRecommendation(name='comic_image_size', recommended_value=None,
+            help=_('Specify the image size as widthxheight pixels. Normally,'
+                ' an image size is automatically calculated from the output '
+                'profile, this option overrides it.')),
        ])

    recommendations = set([
--- a/src/calibre/ebooks/conversion/utils.py
+++ b/src/calibre/ebooks/conversion/utils.py
@ -24,10 +24,11 @@ class HeuristicProcessor(object):
        self.chapters_no_title = 0
        self.chapters_with_title = 0
        self.blanks_deleted = False
+        self.blanks_between_paragraphs = False
        self.linereg = re.compile('(?<=<p).*?(?=</p>)', re.IGNORECASE|re.DOTALL)
-        self.blankreg = re.compile(r'\s*(?P<openline><p(?!\sclass=\"softbreak\")[^>]*>)\s*(?P<closeline></p>)', re.IGNORECASE)
-        self.softbreak = re.compile(r'\s*(?P<openline><p(?=\sclass=\"softbreak\")[^>]*>)\s*(?P<closeline></p>)', re.IGNORECASE)
-        self.multi_blank = re.compile(r'(\s*<p[^>]*>\s*</p>){2,}', re.IGNORECASE)
+        self.blankreg = re.compile(r'\s*(?P<openline><p(?!\sclass=\"(softbreak|spacer)\")[^>]*>)\s*(?P<closeline></p>)', re.IGNORECASE)
+        self.anyblank = re.compile(r'\s*(?P<openline><p[^>]*>)\s*(?P<closeline></p>)', re.IGNORECASE)
+        self.multi_blank = re.compile(r'(\s*<p[^>]*>\s*</p>){2,}(?!\s*<h\d)', re.IGNORECASE)

    def is_pdftohtml(self, src):
        return '<!-- created by calibre\'s pdftohtml -->' in src[:1000]
@ -42,8 +43,10 @@ class HeuristicProcessor(object):
                    " chapters. - " + unicode(chap))
            return '<h2>'+chap+'</h2>\n'
        else:
-            txt_chap = html2text(chap)
-            txt_title = html2text(title)
+            delete_whitespace = re.compile('^\s*(?P<c>.*?)\s*$')
+            delete_quotes = re.compile('\'\"')
+            txt_chap = delete_quotes.sub('', delete_whitespace.sub('\g<c>', html2text(chap)))
+            txt_title = delete_quotes.sub('', delete_whitespace.sub('\g<c>', html2text(title)))
            self.html_preprocess_sections = self.html_preprocess_sections + 1
            self.log.debug("marked " + unicode(self.html_preprocess_sections) +
                    " chapters & titles. - " + unicode(chap) + ", " + unicode(title))
@ -375,9 +378,9 @@ class HeuristicProcessor(object):
        html = re.sub('<p\s?/>', '', html)
        # Get rid of empty span, bold, font, em, & italics tags
        html = re.sub(r"\s*<span[^>]*>\s*(<span[^>]*>\s*</span>){0,2}\s*</span>\s*", " ", html)
-        html = re.sub(r"\s*<(font|[ibu]|em)[^>]*>\s*(<(font|[ibu]|em)[^>]*>\s*</(font|[ibu]|em)>\s*){0,2}\s*</(font|[ibu]|em)>", " ", html)
+        html = re.sub(r"\s*<(font|[ibu]|em|strong)[^>]*>\s*(<(font|[ibu]|em|strong)[^>]*>\s*</(font|[ibu]|em|strong)>\s*){0,2}\s*</(font|[ibu]|em|strong)>", " ", html)
        html = re.sub(r"\s*<span[^>]*>\s*(<span[^>]>\s*</span>){0,2}\s*</span>\s*", " ", html)
-        html = re.sub(r"\s*<(font|[ibu]|em)[^>]*>\s*(<(font|[ibu]|em)[^>]*>\s*</(font|[ibu]|em)>\s*){0,2}\s*</(font|[ibu]|em)>", " ", html)
+        html = re.sub(r"\s*<(font|[ibu]|em|strong)[^>]*>\s*(<(font|[ibu]|em|strong)[^>]*>\s*</(font|[ibu]|em|strong)>\s*){0,2}\s*</(font|[ibu]|em|strong)>", " ", html)
        self.deleted_nbsps = True
        return html

@ -416,6 +419,28 @@ class HeuristicProcessor(object):
                return True
        return False

+    def detect_blank_formatting(self, html):
+        blanks_before_headings = re.compile(r'(\s*<p[^>]*>\s*</p>){1,}(?=\s*<h\d)', re.IGNORECASE)
+        blanks_after_headings = re.compile(r'(?<=</h\d>)(\s*<p[^>]*>\s*</p>){1,}', re.IGNORECASE)
+        
+        def markup_spacers(match):
+           blanks = match.group(0)
+           blanks = self.blankreg.sub('\n<p class="spacer"> </p>', blanks)
+           return blanks
+        html = blanks_before_headings.sub(markup_spacers, html)
+        html = blanks_after_headings.sub(markup_spacers, html)
+        if self.html_preprocess_sections > self.min_chapters:
+            html = re.sub('(?si)^.*?(?=<h\d)', markup_spacers, html)
+        return html
+
+    def detect_soft_breaks(self, html):
+        if not self.blanks_deleted and self.blanks_between_paragraphs:
+            html = self.multi_blank.sub('\n<p class="softbreak" style="margin-top:1.25em; margin-bottom:1.25em; page-break-before:avoid"> </p>', html)
+        else:
+            html = self.blankreg.sub('\n<p class="softbreak" style="margin-top:1.25em; margin-bottom:1.25em; page-break-before:avoid"> </p>', html)
+        return html
+
+

    def __call__(self, html):
        self.log.debug("*********  Heuristic processing HTML  *********")
@ -457,23 +482,23 @@ class HeuristicProcessor(object):
        #html = re.sub('<br[^>]*>', u'<p>\u00a0</p>', html)

        # Determine whether the document uses interleaved blank lines
-        blanks_between_paragraphs = self.analyze_blanks(html)
+        self.blanks_between_paragraphs = self.analyze_blanks(html)

        #self.dump(html, 'before_chapter_markup')
        # detect chapters/sections to match xpath or splitting logic

        if getattr(self.extra_opts, 'markup_chapter_headings', False):
-            html = self.markup_chapters(html, self.totalwords, blanks_between_paragraphs)
+            html = self.markup_chapters(html, self.totalwords, self.blanks_between_paragraphs)

        if getattr(self.extra_opts, 'italicize_common_cases', False):
            html = self.markup_italicis(html)

        # If more than 40% of the lines are empty paragraphs and the user has enabled delete
        # blank paragraphs then delete blank lines to clean up spacing
-        if blanks_between_paragraphs and getattr(self.extra_opts, 'delete_blank_paragraphs', False):
+        if self.blanks_between_paragraphs and getattr(self.extra_opts, 'delete_blank_paragraphs', False):
            self.log.debug("deleting blank lines")
            self.blanks_deleted = True
-            html = self.multi_blank.sub('\n<p class="softbreak" style="margin-top:1.5em; margin-bottom:1.5em"> </p>', html)
+            html = self.multi_blank.sub('\n<p class="softbreak" style="margin-top:1.25em; margin-bottom:1.25em; page-break-before:avoid"> </p>', html)
            html = self.blankreg.sub('', html)

        # Determine line ending type
@ -525,14 +550,13 @@ class HeuristicProcessor(object):
            html = doubleheading.sub('\g<firsthead>'+'\n<h3'+'\g<secondhead>'+'</h3>', html)

        if getattr(self.extra_opts, 'format_scene_breaks', False):
+            html = self.detect_blank_formatting(html)
+            html = self.detect_soft_breaks(html)
            # Center separator lines
-            html = re.sub(u'<(?P<outer>p|div)[^>]*>\s*(<(?P<inner1>font|span|[ibu])[^>]*>)?\s*(<(?P<inner2>font|span|[ibu])[^>]*>)?\s*(<(?P<inner3>font|span|[ibu])[^>]*>)?\s*(?P<break>([*#•=✦]+\s*)+)\s*(</(?P=inner3)>)?\s*(</(?P=inner2)>)?\s*(</(?P=inner1)>)?\s*</(?P=outer)>', '<p style="text-align:center; margin-top:1.25em; margin-bottom:1.25em">' + '\g<break>' + '</p>', html)
-            if not self.blanks_deleted:
-                html = self.multi_blank.sub('\n<p class="softbreak" style="margin-top:1.5em; margin-bottom:1.5em"> </p>', html)
-            html = re.sub('<p\s+class="softbreak"[^>]*>\s*</p>', '<div id="softbreak" style="margin-left: 45%; margin-right: 45%; margin-top:1.5em; margin-bottom:1.5em"><hr style="height: 3px; background:#505050" /></div>', html)
+            html = re.sub(u'<(?P<outer>p|div)[^>]*>\s*(<(?P<inner1>font|span|[ibu])[^>]*>)?\s*(<(?P<inner2>font|span|[ibu])[^>]*>)?\s*(<(?P<inner3>font|span|[ibu])[^>]*>)?\s*(?P<break>([*#•=✦]+\s*)+)\s*(</(?P=inner3)>)?\s*(</(?P=inner2)>)?\s*(</(?P=inner1)>)?\s*</(?P=outer)>', '<p style="text-align:center; margin-top:1.25em; margin-bottom:1.25em; page-break-before:avoid">' + '\g<break>' + '</p>', html)
+            #html = re.sub('<p\s+class="softbreak"[^>]*>\s*</p>', '<div id="softbreak" style="margin-left: 45%; margin-right: 45%; margin-top:1.5em; margin-bottom:1.5em"><hr style="height: 3px; background:#505050" /></div>', html)

        if self.deleted_nbsps:
            # put back non-breaking spaces in empty paragraphs to preserve original formatting
-            html = self.blankreg.sub('\n'+r'\g<openline>'+u'\u00a0'+r'\g<closeline>', html)
-            html = self.softbreak.sub('\n'+r'\g<openline>'+u'\u00a0'+r'\g<closeline>', html)
+            html = self.anyblank.sub('\n'+r'\g<openline>'+u'\u00a0'+r'\g<closeline>', html)
        return html
--- a/src/calibre/ebooks/epub/input.py
+++ b/src/calibre/ebooks/epub/input.py
@ -175,6 +175,19 @@ class EPUBInput(InputFormatPlugin):
                raise ValueError(
                    'EPUB files with DTBook markup are not supported')

+        for x in list(opf.iterspine()):
+            ref = x.get('idref', None)
+            if ref is None:
+                x.getparent().remove(x)
+                continue
+            for y in opf.itermanifest():
+                if y.get('id', None) == ref and y.get('media-type', None) in \
+                    ('application/vnd.adobe-page-template+xml',):
+                        p = x.getparent()
+                        if p is not None:
+                            p.remove(x)
+                        break
+
        with open('content.opf', 'wb') as nopf:
            nopf.write(opf.render())

--- a/src/calibre/ebooks/rtf/input.py
+++ b/src/calibre/ebooks/rtf/input.py
@ -83,6 +83,7 @@ class RTFInput(InputFormatPlugin):
                os.mkdir(debug_dir)
                debug_dir = 'rtfdebug'
                run_lev = 4
+                self.log('Running RTFParser in debug mode')
            except:
                pass
        parser = ParseRtf(
@ -230,22 +231,6 @@ class RTFInput(InputFormatPlugin):
        with open('styles.css', 'ab') as f:
            f.write(css)

-    # def preprocess(self, fname):
-        # self.log('\tPreprocessing to convert unicode characters')
-        # try:
-            # data = open(fname, 'rb').read()
-            # from calibre.ebooks.rtf.preprocess import RtfTokenizer, RtfTokenParser
-            # tokenizer = RtfTokenizer(data)
-            # tokens = RtfTokenParser(tokenizer.tokens)
-            # data = tokens.toRTF()
-            # fname = 'preprocessed.rtf'
-            # with open(fname, 'wb') as f:
-                # f.write(data)
-        # except:
-            # self.log.exception(
-            # 'Failed to preprocess RTF to convert unicode sequences, ignoring...')
-        # return fname
-
    def convert_borders(self, doc):
        border_styles = []
        style_map = {}
@ -280,8 +265,6 @@ class RTFInput(InputFormatPlugin):
        self.opts = options
        self.log = log
        self.log('Converting RTF to XML...')
-        #Name of the preprocesssed RTF file
-        # fname = self.preprocess(stream.name)
        try:
            xml = self.generate_xml(stream.name)
        except RtfInvalidCodeException, e:
@ -335,3 +318,4 @@ class RTFInput(InputFormatPlugin):
        opf.render(open('metadata.opf', 'wb'))
        return os.path.abspath('metadata.opf')

+
--- a/src/calibre/ebooks/rtf2xml/ParseRtf.py
+++ b/src/calibre/ebooks/rtf2xml/ParseRtf.py
@ -238,6 +238,8 @@ class ParseRtf:
                    bug_handler = RtfInvalidCodeException,
                        )
            enc = 'cp' + encode_obj.get_codepage()
+            if enc == 'cp10000':
+                enc = 'mac_roman'
            msg = 'Exception in token processing'
            if check_encoding_obj.check_encoding(self.__file, enc):
                file_name = self.__file if isinstance(self.__file, str) \
--- a/src/calibre/ebooks/rtf2xml/colors.py
+++ b/src/calibre/ebooks/rtf2xml/colors.py
@ -15,8 +15,10 @@
 #                                                                       #
 #                                                                       #
 #########################################################################
-import sys, os, tempfile,  re
+import sys, os, tempfile, re
+
 from calibre.ebooks.rtf2xml import copy
+
 class Colors:
    """
    Change lines with color info from color numbers to the actual color names.
@ -40,8 +42,10 @@ class Colors:
        self.__file = in_file
        self.__copy = copy
        self.__bug_handler = bug_handler
+        self.__line = 0
        self.__write_to = tempfile.mktemp()
        self.__run_level = run_level
+
    def __initiate_values(self):
        """
        Initiate all values.
@ -61,6 +65,7 @@ class Colors:
        self.__color_num = 1
        self.__line_color_exp = re.compile(r'bdr-color_:(\d+)')
        # cw<bd<bor-par-to<nu<bdr-hair__|bdr-li-wid:0.50|bdr-sp-wid:1.00|bdr-color_:2
+
    def __before_color_func(self, line):
        """
        Requires:
@ -76,6 +81,7 @@ class Colors:
        if self.__token_info == 'mi<mk<clrtbl-beg':
            self.__state = 'in_color_table'
        self.__write_obj.write(line)
+
    def __default_color_func(self, line):
        """
        Requires:
@ -87,6 +93,7 @@ class Colors:
            """
        hex_num = line[-3:-1]
        self.__color_string += hex_num
+
    def __blue_func(self, line):
        """
        Requires:
@ -109,6 +116,7 @@ class Colors:
        )
        self.__color_num += 1
        self.__color_string = '#'
+
    def __in_color_func(self, line):
        """
        Requires:
@ -127,12 +135,13 @@ class Colors:
            self.__state = 'after_color_table'
        else:
            action = self.__state_dict.get(self.__token_info)
-            if action == None:
+            if action is None:
                sys.stderr.write('in module colors.py\n'
                'function is self.__in_color_func\n'
                'no action for %s' % self.__token_info
                )
            action(line)
+
    def __after_color_func(self, line):
        """
        Check the to see if it contains color info. If it does, extract the
@ -180,6 +189,7 @@ class Colors:
        else:
            self.__write_obj.write(line)
        # cw<bd<bor-par-to<nu<bdr-hair__|bdr-li-wid:0.50|bdr-sp-wid:1.00|bdr-color_:2
+
    def __sub_from_line_color(self, match_obj):
        num = match_obj.group(1)
        try:
@ -191,25 +201,27 @@ class Colors:
            else:
                return 'bdr-color_:no-value'
        hex_num = self.__figure_num(num)
-        return_value = 'bdr-color_:%s' % hex_num
-        return return_value
+        return 'bdr-color_:%s' % hex_num
+
    def __figure_num(self, num):
        if num == 0:
            hex_num = 'false'
        else:
            hex_num = self.__color_dict.get(num)
-        if hex_num == None:
-            if self.__run_level > 3:
-                msg = 'no value in self.__color_dict for key %s\n' % num
-                raise self.__bug_hanlder, msg
-        if hex_num == None:
+        if hex_num is None:
            hex_num = '0'
+            if self.__run_level > 5:
+                msg = 'no value in self.__color_dict' \
+                'for key %s at line %d\n' % (num, self.__line)
+                raise self.__bug_handler, msg
        return hex_num
+
    def __do_nothing_func(self, line):
        """
        Bad RTF will have text in the color table
        """
        pass
+
    def convert_colors(self):
        """
        Requires:
@ -226,20 +238,16 @@ class Colors:
            info, and substitute the number with the hex number.
        """
        self.__initiate_values()
-        read_obj = open(self.__file, 'r')
-        self.__write_obj = open(self.__write_to, 'w')
-        line_to_read = 1
-        while line_to_read:
-            line_to_read = read_obj.readline()
-            line = line_to_read
-            self.__token_info = line[:16]
-            action = self.__state_dict.get(self.__state)
-            if action == None:
-                sys.stderr.write('no no matching state in module fonts.py\n')
-                sys.stderr.write(self.__state + '\n')
-            action(line)
-        read_obj.close()
-        self.__write_obj.close()
+        with open(self.__file, 'r') as read_obj:
+            with open(self.__write_to, 'w') as self.__write_obj:
+                for line in read_obj:
+                    self.__line+=1
+                    self.__token_info = line[:16]
+                    action = self.__state_dict.get(self.__state)
+                    if action is None:
+                        sys.stderr.write('no matching state in module fonts.py\n')
+                        sys.stderr.write(self.__state + '\n')
+                    action(line)
        copy_obj = copy.Copy(bug_handler = self.__bug_handler)
        if self.__copy:
            copy_obj.copy_file(self.__write_to, "color.data")
--- a/src/calibre/ebooks/rtf2xml/convert_to_tags.py
+++ b/src/calibre/ebooks/rtf2xml/convert_to_tags.py
@ -33,13 +33,13 @@ class ConvertToTags:
        self.__copy = copy
        self.__dtd_path = dtd_path
        self.__no_dtd = no_dtd
-        if encoding != 'mac_roman':
-            self.__encoding = 'cp' + encoding
-        else:
+        self.__encoding = 'cp' + encoding
+        if encoding == 'mac_roman':
            self.__encoding = 'mac_roman'
        self.__indent = indent
        self.__run_level = run_level
        self.__write_to = tempfile.mktemp()
+        self.__convert_utf = False

    def __initiate_values(self):
        """
@ -213,7 +213,8 @@ class ConvertToTags:
        if not check_encoding_obj.check_encoding(self.__file, verbose=False):
            self.__write_obj.write('<?xml version="1.0" encoding="US-ASCII" ?>')
        elif not check_encoding_obj.check_encoding(self.__file, self.__encoding):
-            self.__write_obj.write('<?xml version="1.0" encoding="%s" ?>' % self.__encoding)
+            self.__write_obj.write('<?xml version="1.0" encoding="UTF-8" ?>')
+            self.__convert_utf = True
        else:
            self.__write_obj.write('<?xml version="1.0" encoding="US-ASCII" ?>')
            sys.stderr.write('Bad RTF encoding, revert to US-ASCII chars and'
@ -253,15 +254,28 @@ class ConvertToTags:
            an empty tag function.
            """
        self.__initiate_values()
-        self.__write_obj = open(self.__write_to, 'w')
-        self.__write_dec()
-        with open(self.__file, 'r') as read_obj:
-            for line in read_obj:
-                self.__token_info = line[:16]
-                action = self.__state_dict.get(self.__token_info)
-                if action is not None:
-                    action(line)
+        with open(self.__write_to, 'w') as self.__write_obj:
+            self.__write_dec()
+            with open(self.__file, 'r') as read_obj:
+                for line in read_obj:
+                    self.__token_info = line[:16]
+                    action = self.__state_dict.get(self.__token_info)
+                    if action is not None:
+                        action(line)
        self.__write_obj.close()
+        #convert all encodings to UTF8 to avoid unsupported encodings in lxml
+        if self.__convert_utf:
+            copy_obj = copy.Copy(bug_handler = self.__bug_handler)
+            copy_obj.rename(self.__write_to, self.__file)
+            with open(self.__file, 'r') as read_obj:
+                with open(self.__write_to, 'w') as write_obj:
+                    file = read_obj.read()
+                    try:
+                        file = file.decode(self.__encoding)
+                        write_obj.write(file.encode('utf-8'))
+                    except:
+                        sys.stderr.write('Conversion to UTF-8 is not possible,'
+                        ' encoding should be very carefully checked')
        copy_obj = copy.Copy(bug_handler = self.__bug_handler)
        if self.__copy:
            copy_obj.copy_file(self.__write_to, "convert_to_tags.data")
--- a/src/calibre/ebooks/rtf2xml/default_encoding.py
+++ b/src/calibre/ebooks/rtf2xml/default_encoding.py
@ -75,12 +75,16 @@ class DefaultEncoding:
            self._encoding()
            self.__datafetched = True
            code_page = 'ansicpg' + self.__code_page
+            if self.__code_page == '10000':
+                self.__code_page = 'mac_roman'
        return self.__platform, code_page, self.__default_num

    def get_codepage(self):
        if not self.__datafetched:
            self._encoding()
            self.__datafetched = True
+            if self.__code_page == '10000':
+                self.__code_page = 'mac_roman'
        return self.__code_page

    def get_platform(self):
--- a/src/calibre/ebooks/rtf2xml/fonts.py
+++ b/src/calibre/ebooks/rtf2xml/fonts.py
@ -16,7 +16,9 @@
 #                                                                       #
 #########################################################################
 import sys, os, tempfile
+
 from calibre.ebooks.rtf2xml import copy
+
 class Fonts:
    """
    Change lines with font info from font numbers to the actual font names.
@ -45,6 +47,7 @@ class Fonts:
        self.__default_font_num = default_font_num
        self.__write_to = tempfile.mktemp()
        self.__run_level = run_level
+
    def __initiate_values(self):
        """
        Initiate all values.
@ -67,6 +70,7 @@ class Fonts:
        self.__font_table = {}
        # individual font written
        self.__wrote_ind_font = 0
+
    def __default_func(self, line):
        """
        Requires:
@ -79,6 +83,7 @@ class Fonts:
        if self.__token_info == 'mi<mk<fonttb-beg':
            self.__state = 'font_table'
        self.__write_obj.write(line)
+
    def __font_table_func(self, line):
        """
        Requires:
@ -101,6 +106,7 @@ class Fonts:
            self.__font_num = self.__default_font_num
            self.__text_line = ''
        ##self.__write_obj.write(line)
+
    def __font_in_table_func(self, line):
        """
        Requires:
@ -138,6 +144,7 @@ class Fonts:
        elif self.__token_info == 'mi<mk<fonttb-end':
            self.__found_end_font_table_func()
            self.__state = 'after_font_table'
+
    def __found_end_font_table_func(self):
        """
        Required:
@ -150,7 +157,8 @@ class Fonts:
        if not self.__wrote_ind_font:
            self.__write_obj.write(
            'mi<tg<empty-att_'
-            '<font-in-table<name>Times<num>0\n' )
+            '<font-in-table<name>Times<num>0\n')
+
    def __after_font_table_func(self, line):
        """
        Required:
@ -169,7 +177,7 @@ class Fonts:
        if self.__token_info == 'cw<ci<font-style':
            font_num = line[20:-1]
            font_name = self.__font_table.get(font_num)
-            if font_name == None:
+            if font_name is None:
                if self.__run_level > 3:
                    msg = 'no value for %s in self.__font_table\n' % font_num
                    raise self.__bug_handler, msg
@ -182,6 +190,7 @@ class Fonts:
                )
        else:
            self.__write_obj.write(line)
+
    def convert_fonts(self):
        """
        Required:
@ -197,20 +206,15 @@ class Fonts:
            info. Substitute a font name for a font number.
            """
        self.__initiate_values()
-        read_obj = open(self.__file, 'r')
-        self.__write_obj = open(self.__write_to, 'w')
-        line_to_read = 1
-        while line_to_read:
-            line_to_read = read_obj.readline()
-            line = line_to_read
-            self.__token_info = line[:16]
-            action = self.__state_dict.get(self.__state)
-            if action == None:
-                sys.stderr.write('no no matching state in module fonts.py\n')
-                sys.stderr.write(self.__state + '\n')
-            action(line)
-        read_obj.close()
-        self.__write_obj.close()
+        with open(self.__file, 'r') as read_obj:
+            with open(self.__write_to, 'w') as self.__write_obj:
+                for line in read_obj:
+                    self.__token_info = line[:16]
+                    action = self.__state_dict.get(self.__state)
+                    if action is None:
+                        sys.stderr.write('no matching state in module fonts.py\n' \
+                                            + self.__state + '\n')
+                    action(line)
        default_font_name = self.__font_table.get(self.__default_font_num)
        if not default_font_name:
            default_font_name = 'Not Defined'
--- a/src/calibre/ebooks/rtf2xml/get_char_map.py
+++ b/src/calibre/ebooks/rtf2xml/get_char_map.py
@ -43,7 +43,7 @@ class GetCharMap:
    def get_char_map(self, map):
        if map == 'ansicpg0':
            map = 'ansicpg1250'
-        if map in ('ansicpg10000', '10000'):
+        if map == 'ansicpg10000':
            map = 'mac_roman'
        found_map = False
        map_dict = {}
--- a/src/calibre/ebooks/rtf2xml/tokenize.py
+++ b/src/calibre/ebooks/rtf2xml/tokenize.py
@ -126,12 +126,6 @@ class Tokenize:
        tokens = re.split(self.__splitexp, input_file)
        #remove empty tokens and \n
        return filter(lambda x: len(x) > 0 and x != '\n', tokens)
-        #input_file = re.sub(self.__utf_exp, self.__from_ms_to_utf8, input_file)
-        # line = re.sub( self.__neg_utf_exp, self.__neg_unicode_func, line)
-        # this is for older RTF
-        #line = re.sub(self.__par_exp, '\\par ', line)
-        #return filter(lambda x: len(x) > 0, \
-            #(self.__remove_line.sub('', x) for x in tokens)) 

    def __compile_expressions(self):
        SIMPLE_RPL = {
@ -160,7 +154,7 @@ class Tokenize:
            }
        self.__replace_spchar = MReplace(SIMPLE_RPL)
        #add ;? in case of char following \u
-        self.__ms_hex_exp = re.compile(r"\\\'([0-9a-fA-F]{2})") #r"\\\'(..)"
+        self.__ms_hex_exp = re.compile(r"\\\'([0-9a-fA-F]{2})")
        self.__utf_exp = re.compile(r"\\u(-?\d{3,6}) ?")
        self.__bin_exp = re.compile(r"(?:\\bin(-?\d{0,10})[\n ]+)[01\n]+")
        #manage upr/ud situations
@ -172,14 +166,21 @@ class Tokenize:
        self.__splitexp = re.compile(r"(\\[{}]|\n|\\[^\s\\{}&]+(?:[ \t\r\f\v])?)")
        #this is for old RTF
        self.__par_exp = re.compile(r'\\\n+')
-        # self.__par_exp = re.compile(r'\\$')
+        #handle cw using a digit as argument and without space as delimiter
+        self.__cwdigit_exp = re.compile(r"(\\[a-zA-Z]+[\-0-9]+)([^0-9 \\]+)")
        #self.__bin_exp = re.compile(r"\\bin(-?\d{1,8}) {0,1}")
        #self.__utf_exp = re.compile(r"^\\u(-?\d{3,6})")
        #self.__splitexp = re.compile(r"(\\[\\{}]|{|}|\n|\\[^\s\\{}&]+(?:\s)?)")
        #self.__remove_line = re.compile(r'\n+')
-        #self.__mixed_exp = re.compile(r"(\\[a-zA-Z]+\d+)(\D+)")
        ##self.num_exp = re.compile(r"(\*|:|[a-zA-Z]+)(.*)")

+    def __correct_spliting(self, token):
+        match_obj = re.search(self.__cwdigit_exp, token)
+        if match_obj is None:
+            return token
+        else:
+            return '%s\n%s' % (match_obj.group(1), match_obj.group(2))
+
    def tokenize(self):
        """Main class for handling other methods. Reads the file \
        , uses method self.sub_reg to make basic substitutions,\
@ -187,7 +188,7 @@ class Tokenize:
        #read
        with open(self.__file, 'r') as read_obj:
            input_file = read_obj.read()
-        
+
        #process simple replacements and split giving us a correct list
        #remove '' and \n in the process
        tokens = self.__sub_reg_split(input_file)
@ -195,7 +196,9 @@ class Tokenize:
        tokens = map(self.__unicode_process, tokens)
        #remove empty items created by removing \uc
        tokens = filter(lambda x: len(x) > 0, tokens)
-        
+        #handles bothersome cases
+        tokens = map(self.__correct_spliting, tokens)
+
        #write
        with open(self.__write_to, 'wb') as write_obj:
            write_obj.write('\n'.join(tokens))
@ -203,11 +206,9 @@ class Tokenize:
        copy_obj = copy.Copy(bug_handler = self.__bug_handler)
        if self.__copy:
            copy_obj.copy_file(self.__write_to, "tokenize.data")
-        # if self.__out_file:
-            # self.__file = self.__out_file
        copy_obj.rename(self.__write_to, self.__file)
        os.remove(self.__write_to)
-        
+
        #self.__special_tokens = [ '_', '~', "'", '{', '}' ]

 # import sys
@ -223,4 +224,4 @@ class Tokenize:


 # if __name__ == '__main__':
-    # sys.exit(main())
+    # sys.exit(main())
--- a/src/calibre/gui2/convert/comic_input.py
+++ b/src/calibre/gui2/convert/comic_input.py
@ -22,7 +22,7 @@ class PluginWidget(Widget, Ui_Form):
                ['colors', 'dont_normalize', 'keep_aspect_ratio', 'right2left',
                    'despeckle', 'no_sort', 'no_process', 'landscape',
                    'dont_sharpen', 'disable_trim', 'wide', 'output_format',
-                    'dont_grayscale']
+                    'dont_grayscale', 'comic_image_size']
                )
        self.db, self.book_id = db, book_id
        for x in get_option('output_format').option.choices:
--- a/src/calibre/gui2/convert/comic_input.ui
+++ b/src/calibre/gui2/convert/comic_input.ui
@ -7,7 +7,7 @@
    <x>0</x>
    <y>0</y>
    <width>599</width>
-    <height>345</height>
+    <height>398</height>
   </rect>
  </property>
  <property name="windowTitle">
@ -37,70 +37,70 @@
     </property>
    </widget>
   </item>
-   <item row="3" column="0">
+   <item row="4" column="0">
    <widget class="QCheckBox" name="opt_dont_normalize">
     <property name="text">
      <string>Disable &amp;normalize</string>
     </property>
    </widget>
   </item>
-   <item row="4" column="0">
+   <item row="5" column="0">
    <widget class="QCheckBox" name="opt_keep_aspect_ratio">
     <property name="text">
      <string>Keep &amp;aspect ratio</string>
     </property>
    </widget>
   </item>
-   <item row="5" column="0">
+   <item row="6" column="0">
    <widget class="QCheckBox" name="opt_dont_sharpen">
     <property name="text">
      <string>Disable &amp;Sharpening</string>
     </property>
    </widget>
   </item>
-   <item row="6" column="0">
+   <item row="7" column="0">
    <widget class="QCheckBox" name="opt_disable_trim">
     <property name="text">
      <string>Disable &amp;Trimming</string>
     </property>
    </widget>
   </item>
-   <item row="7" column="0">
+   <item row="8" column="0">
    <widget class="QCheckBox" name="opt_wide">
     <property name="text">
      <string>&amp;Wide</string>
     </property>
    </widget>
   </item>
-   <item row="8" column="0">
+   <item row="9" column="0">
    <widget class="QCheckBox" name="opt_landscape">
     <property name="text">
      <string>&amp;Landscape</string>
     </property>
    </widget>
   </item>
-   <item row="9" column="0">
+   <item row="10" column="0">
    <widget class="QCheckBox" name="opt_right2left">
     <property name="text">
      <string>&amp;Right to left</string>
     </property>
    </widget>
   </item>
-   <item row="10" column="0">
+   <item row="11" column="0">
    <widget class="QCheckBox" name="opt_no_sort">
     <property name="text">
      <string>Don't so&amp;rt</string>
     </property>
    </widget>
   </item>
-   <item row="11" column="0">
+   <item row="12" column="0">
    <widget class="QCheckBox" name="opt_despeckle">
     <property name="text">
      <string>De&amp;speckle</string>
     </property>
    </widget>
   </item>
-   <item row="13" column="0">
+   <item row="14" column="0">
    <spacer name="verticalSpacer">
     <property name="orientation">
      <enum>Qt::Vertical</enum>
@ -120,7 +120,7 @@
     </property>
    </widget>
   </item>
-   <item row="12" column="0">
+   <item row="13" column="0">
    <widget class="QLabel" name="label">
     <property name="text">
      <string>&amp;Output format:</string>
@ -130,7 +130,7 @@
     </property>
    </widget>
   </item>
-   <item row="12" column="1">
+   <item row="13" column="1">
    <widget class="QComboBox" name="opt_output_format"/>
   </item>
   <item row="1" column="0">
@ -140,6 +140,19 @@
     </property>
    </widget>
   </item>
+   <item row="3" column="0">
+    <widget class="QLabel" name="label_2">
+     <property name="text">
+      <string>Override image  &amp;size:</string>
+     </property>
+     <property name="buddy">
+      <cstring>opt_comic_image_size</cstring>
+     </property>
+    </widget>
+   </item>
+   <item row="3" column="1">
+    <widget class="QLineEdit" name="opt_comic_image_size"/>
+   </item>
  </layout>
 </widget>
 <resources/>
--- a/src/calibre/gui2/device.py
+++ b/src/calibre/gui2/device.py
@ -838,9 +838,9 @@ class DeviceMixin(object): # {{{
                            format_count[f] = 1
            for f in self.device_manager.device.settings().format_map:
                if f in format_count.keys():
-                    formats.append((f, _('%i of %i Books' % (format_count[f], len(rows))), True if f in aval_out_formats else False))
+                    formats.append((f, _('%i of %i Books') % (format_count[f], len(rows))), True if f in aval_out_formats else False)
                elif f in aval_out_formats:
-                    formats.append((f, _('0 of %i Books' % len(rows)), True))
+                    formats.append((f, _('0 of %i Books') % len(rows)), True)
            d = ChooseFormatDeviceDialog(self, _('Choose format to send to device'), formats)
            if d.exec_() != QDialog.Accepted:
                return
--- a/src/calibre/gui2/dialogs/check_library.py
+++ b/src/calibre/gui2/dialogs/check_library.py
@ -7,7 +7,7 @@ import os, shutil

 from PyQt4.Qt import QDialog, QVBoxLayout, QHBoxLayout, QTreeWidget, QLabel, \
            QPushButton, QDialogButtonBox, QApplication, QTreeWidgetItem, \
-            QLineEdit, Qt, QProgressBar, QSize, QTimer
+            QLineEdit, Qt, QProgressBar, QSize, QTimer, QIcon, QTextEdit

 from calibre.gui2.dialogs.confirm_delete import confirm
 from calibre.library.check_library import CheckLibrary, CHECKS
@ -16,7 +16,7 @@ from calibre import prints, as_unicode
 from calibre.ptempfile import PersistentTemporaryFile
 from calibre.library.sqlite import DBThread, OperationalError

-class DBCheck(QDialog):
+class DBCheck(QDialog): # {{{

    def __init__(self, parent, db):
        QDialog.__init__(self, parent)
@ -134,7 +134,7 @@ class DBCheck(QDialog):
    def reject(self):
        self.rejected = True
        QDialog.reject(self)
-
+# }}}

 class Item(QTreeWidgetItem):
    pass
@ -146,9 +146,70 @@ class CheckLibraryDialog(QDialog):
        self.db = db

        self.setWindowTitle(_('Check Library -- Problems Found'))
+        self.setWindowIcon(QIcon(I('debug.png')))

-        self._layout = QVBoxLayout(self)
-        self.setLayout(self._layout)
+        self._tl = QHBoxLayout()
+        self._layout = QVBoxLayout()
+        self.setLayout(self._tl)
+        self._tl.addLayout(self._layout)
+        self.helpw = QTextEdit(self)
+        self._tl.addWidget(self.helpw)
+        self.helpw.setReadOnly(True)
+        self.helpw.setText(_('''\
+        <h1>Help</h1>
+
+        <p>calibre stores the list of your books and their metadata in a
+        database. The actual book files and covers are stored as normal
+        files in the calibre library folder. The database contains a list of the files
+        and covers belonging to each book entry. This tool checks that the
+        actual files in the library folder on your computer match the
+        information in the database.</p>
+
+        <p>The result of each type of check is shown to the left. The various
+        checks are:
+        </p>
+        <ul>
+        <li><b>Invalid titles</b>: These are files and folders appearing
+        in the library where books titles should, but that do not have the
+        correct form to be a book title.</li>
+        <li><b>Extra titles</b>: These are extra files in your calibre
+        library that appear to be correctly-formed titles, but have no corresponding
+        entries in the database</li>
+        <li><b>Invalid authors</b>: These are files appearing
+        in the library where only author folders should be.</li>
+        <li><b>Extra authors</b>: These are folders in the
+        calibre library that appear to be authors but that do not have entries
+        in the database</li>
+        <li><b>Missing book formats</b>: These are book formats that are in
+        the database but have no corresponding format file in the book's folder.
+        <li><b>Extra book formats</b>: These are book format files found in
+        the book's folder but not in the database.
+        <li><b>Unknown files in books</b>: These are extra files in the
+        folder of each book that do not correspond to a known format or cover
+        file.</li>
+        <li><b>Missing cover files</b>: These represent books that are marked
+        in the database as having covers but the actual cover files are
+        missing.</li>
+        <li><b>Cover files not in database</b>: These are books that have
+        cover files but are marked as not having covers in the database.</li>
+        <li><b>Folder raising exception</b>: These represent folders in the
+        calibre library that could not be processed/understood by this
+        tool.</li>
+        </ul>
+
+        <p>There are two kinds of automatic fixes possible: <i>Delete
+        marked</i> and <i>Fix marked</i>.</p>
+        <p><i>Delete marked</i> is used to remove extra files/folders/covers that
+        have no entries in the database. Check the box next to the item you want
+        to delete. Use with caution.</p>
+        <p><i>Fix marked</i> is applicable only to covers (the two lines marked
+        'fixable'). In the case of missing cover files, checking the fixable
+        box and pushing this button will remove the cover mark from the
+        database for all the files in that category. In the case of extra
+        cover files, checking the fixable box and pushing this button will
+        add the cover mark to the database for all the files in that
+        category.</p>
+        '''))

        self.log = QTreeWidget(self)
        self.log.itemChanged.connect(self.item_changed)
@ -199,7 +260,7 @@ class CheckLibraryDialog(QDialog):
        self._layout.addLayout(h)

        self._layout.addWidget(self.bbox)
-        self.resize(750, 500)
+        self.resize(950, 500)
        self.bbox.setEnabled(True)

    def do_exec(self):
@ -347,5 +408,6 @@ class CheckLibraryDialog(QDialog):

 if __name__ == '__main__':
    app = QApplication([])
-    d = CheckLibraryDialog()
+    from calibre.library import db
+    d = CheckLibraryDialog(None, db())
    d.exec_()
--- a/src/calibre/gui2/preferences/plugins.py
+++ b/src/calibre/gui2/preferences/plugins.py
@ -266,7 +266,7 @@ class ConfigWidget(ConfigWidgetBase, Ui_Form):

    def add_plugin(self):
        path = choose_files(self, 'add a plugin dialog', _('Add plugin'),
-                filters=[(_('Plugins'), ['zip'])], all_files=False,
+                filters=[(_('Plugins') + ' (*.zip)', ['zip'])], all_files=False,
                    select_only_single_file=True)
        if not path:
            return
--- a/src/calibre/library/catalog.py
+++ b/src/calibre/library/catalog.py
@ -232,6 +232,7 @@ class BIBTEX(CatalogPlugin): # {{{
                help = _('The fields to output when cataloging books in the '
                    'database.  Should be a comma-separated list of fields.\n'
                    'Available fields: %s.\n'
+                    'plus user-created custom fields.\n'
                    'Example: %s=title,authors,tags\n'
                    "Default: '%%default'\n"
                    "Applies to: BIBTEX output format")%(', '.join(FIELDS),
@ -269,7 +270,7 @@ class BIBTEX(CatalogPlugin): # {{{
                dest = 'bib_cit',
                action = None,
                help = _('The template for citation creation from database fields.\n'
-                    ' Should be a template with {} enclosed fields.\n'
+                    'Should be a template with {} enclosed fields.\n'
                    'Available fields: %s.\n'
                    "Default: '%%default'\n"
                    "Applies to: BIBTEX output format")%', '.join(TEMPLATE_ALLOWED_FIELDS)),
@ -344,7 +345,7 @@ class BIBTEX(CatalogPlugin): # {{{
                if field == 'authors' :
                    bibtex_entry.append(u'author = "%s"' % bibtexdict.bibtex_author_format(item))

-                elif field in ['title', 'publisher', 'cover', 'uuid',
+                elif field in ['title', 'publisher', 'cover', 'uuid', 'ondevice',
                        'author_sort', 'series'] :
                    bibtex_entry.append(u'%s = "%s"' % (field, bibtexdict.utf8ToBibtex(item)))

@ -378,7 +379,7 @@ class BIBTEX(CatalogPlugin): # {{{
                    if calibre_files:
                        files = [u':%s:%s' % (format, format.rpartition('.')[2].upper())\
                            for format in item]
-                        bibtex_entry.append(u'files = "%s"' % u', '.join(files))
+                        bibtex_entry.append(u'file = "%s"' % u', '.join(files))

                elif field == 'series_index' :
                    bibtex_entry.append(u'volume = "%s"' % int(item))
@ -474,6 +475,8 @@ class BIBTEX(CatalogPlugin): # {{{
        if opts.verbose:
            opts_dict = vars(opts)
            log("%s(): Generating %s" % (self.name,self.fmt))
+            if opts.connected_device['is_device_connected']:
+                log(" connected_device: %s" % opts.connected_device['name'])
            if opts_dict['search_text']:
                log(" --search='%s'" % opts_dict['search_text'])

@ -548,6 +551,7 @@ class BIBTEX(CatalogPlugin): # {{{
            as outfile:
            #File header
            nb_entries = len(data)
+
            #check in book strict if all is ok else throw a warning into log
            if bib_entry == 'book' :
                nb_books = len(filter(check_entry_book_valid, data))
@ -555,6 +559,11 @@ class BIBTEX(CatalogPlugin): # {{{
                    log(" WARNING: only %d entries in %d are book compatible" % (nb_books, nb_entries))
                    nb_entries = nb_books

+            # If connected device, add 'On Device' values to data
+            if opts.connected_device['is_device_connected'] and 'ondevice' in fields:
+                for entry in data:
+                    entry['ondevice'] = db.catalog_plugin_on_device_temp_mapping[entry['id']]['ondevice']
+
            outfile.write(u'%%%Calibre catalog\n%%%{0} entries in catalog\n\n'.format(nb_entries))
            outfile.write(u'@preamble{"This catalog of %d entries was generated by calibre on %s"}\n\n'
                % (nb_entries, nowf().strftime("%A, %d. %B %Y %H:%M").decode(preferred_encoding)))
--- a/src/calibre/translations/calibre.pot
+++ b/src/calibre/translations/calibre.pot
--- a/src/calibre/utils/localization.py
+++ b/src/calibre/utils/localization.py
@ -112,6 +112,16 @@ _extra_lang_codes = {
        'en_IE' : _('English (Ireland)'),
        'en_CN' : _('English (China)'),
        'es_PY' : _('Spanish (Paraguay)'),
+        'es_UY' : _('Spanish (Uruguay)'),
+        'es_AR' : _('Spanish (Argentina)'),
+        'es_MX' : _('Spanish (Mexico)'),
+        'es_CU' : _('Spanish (Cuba)'),
+        'es_CL' : _('Spanish (Chile)'),
+        'es_EC' : _('Spanish (Ecuador)'),
+        'es_HN' : _('Spanish (Honduras)'),
+        'es_VE' : _('Spanish (Venezuela)'),
+        'es_BO' : _('Spanish (Bolivia)'),
+        'es_NI' : _('Spanish (Nicaragua)'),
        'de_AT' : _('German (AT)'),
        'fr_BE' : _('French (BE)'),
        'nl'    : _('Dutch (NL)'),