Fix #5023 (Updated recipes for Chilean newspapers)

2025-07-09 03:04:10 -04:00 · 2010-02-28 10:58:19 -07:00 · 2010-02-28 10:58:19 -07:00 · da8bfe6b02
commit da8bfe6b02
parent 83feb86713
3 changed files with 49 additions and 62 deletions
--- a/resources/recipes/la_segunda.recipe
+++ b/resources/recipes/la_segunda.recipe
@ -1,7 +1,5 @@
-#!/usr/bin/env  python
-
 __license__   = 'GPL v3'
-__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
+__copyright__ = '2009-2010, Darko Miletic <darko.miletic at gmail.com>'
 '''
 lasegunda.com
 '''
@ -19,21 +17,23 @@ class LaSegunda(BasicNewsRecipe):
    no_stylesheets        = True
    use_embedded_content  = False
    encoding              = 'cp1252'
-    cover_url             = 'http://www.lasegunda.com/imagenes/logotipo_lasegunda_Oli.gif'
-    remove_javascript     = True
+    masthead_url          = 'http://www.lasegunda.com/imagenes/logotipo_lasegunda_Oli.gif'
+    remove_empty_feeds    = True
    language              = 'es'
+    extra_css             = ' .titulonegritastop{font-size: xx-large; font-weight: bold} '            
    
+    conversion_options = {
+                          'comment'          : description
+                        , 'tags'             : category
+                        , 'publisher'        : publisher
+                        , 'language'         : language
+						, 'linearize_tables' : True
+                        }
                        
-    html2lrf_options = [
-                          '--comment', description
-                        , '--category', category
-                        , '--publisher', publisher
-                        , '--ignore-tables'
-                        ]
+    remove_tags_before = dict(attrs={'class':'titulonegritastop'})
+    remove_tags        = [dict(name='img')]
+    remove_attributes  = ['width','height']
 	
-    html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True\noverride_css=" p {text-indent: 0em; margin-top: 0em; margin-bottom: 0.5em} "' 
-                        
-    keep_only_tags = [dict(name='table')]
                        
    feeds = [ 
               (u'Noticias de ultima hora', u'http://www.lasegunda.com/rss20/index.asp?canal=0')
@ -52,10 +52,3 @@ class LaSegunda(BasicNewsRecipe):
        rest, sep, article_id = url.partition('index.asp?idnoticia=')        
        return u'http://www.lasegunda.com/edicionOnline/include/secciones/_detalle_impresion.asp?idnoticia=' + article_id
    
-    def preprocess_html(self, soup):
-        mtag = '<meta http-equiv="Content-Language" content="es-CL"/>'
-        soup.head.insert(0,mtag)
-        for item in soup.findAll(style=True):
-            del item['style']
-        return soup
-    
--- a/resources/recipes/la_tercera.recipe
+++ b/resources/recipes/la_tercera.recipe
@ -1,7 +1,5 @@
-#!/usr/bin/env  python
-
 __license__   = 'GPL v3'
-__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
+__copyright__ = '2009-2010, Darko Miletic <darko.miletic at gmail.com>'
 '''
 latercera.com
 '''
@ -18,32 +16,32 @@ class LaTercera(BasicNewsRecipe):
    max_articles_per_feed = 100
    no_stylesheets        = True
    encoding              = 'cp1252'
-    remove_javascript     = True
    use_embedded_content  = False
+    remove_empty_feeds    = True
+    language              = 'es'
 
-    html2lrf_options = [
-                          '--comment', description
-                        , '--category', category
-                        , '--publisher', publisher
-                        ]
-
-    html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
+    conversion_options = {
+                          'comment'          : description
+                        , 'tags'             : category
+                        , 'publisher'        : publisher
+                        , 'language'         : language
+						, 'linearize_tables' : True
+                        }

    keep_only_tags = [dict(name='div', attrs={'class':['span-16 articulo border','span-16 border','span-16']}) ]

    remove_tags = [
-                     dict(name='script')
-                    ,dict(name='ul')
+                     dict(name=['ul','input','base'])
                    ,dict(name='div', attrs={'id':['boxComentarios','shim','enviarAmigo']})
                    ,dict(name='div', attrs={'class':['ad640','span-10 imgSet A','infoRelCol']})
-                    ,dict(name='input')
                    ,dict(name='p', attrs={'id':['mensajeError','mensajeEnviandoNoticia','mensajeExito']})
                  ]


    feeds = [
               (u'Noticias de ultima hora', u'http://www.latercera.com/app/rss?sc=TEFURVJDRVJB&ul=1')
-              ,(u'Pais', u'http://www.latercera.com/app/rss?sc=TEFURVJDRVJB&category=654')
+              ,(u'Nacional', u'http://www.latercera.com/app/rss?sc=TEFURVJDRVJB&category=680')
+              ,(u'Politica', u'http://www.latercera.com/app/rss?sc=TEFURVJDRVJB&category=674')              
              ,(u'Mundo', u'http://www.latercera.com/app/rss?sc=TEFURVJDRVJB&category=678')
              ,(u'Deportes', u'http://www.latercera.com/app/rss?sc=TEFURVJDRVJB&category=656')
              ,(u'Negocios', u'http://www.latercera.com/app/rss?sc=TEFURVJDRVJB&category=655')
@ -55,10 +53,6 @@ class LaTercera(BasicNewsRecipe):
            ]

    def preprocess_html(self, soup):
-        mtag = '<meta http-equiv="Content-Language" content="es-CL"/>'
-        soup.head.insert(0,mtag)
        for item in soup.findAll(style=True):
            del item['style']
        return soup
-
-    language = 'es'
--- a/src/calibre/devices/usbms/device.py
+++ b/src/calibre/devices/usbms/device.py
@ -398,16 +398,6 @@ class Device(DeviceConfig, DevicePlugin):
        if len(matches) > 2:
            drives['cardb'] = matches[2]

-        pat = self.OSX_MAIN_MEM_VOL_PAT
-        if pat is not None and len(drives) > 1 and 'main' in drives:
-            if pat.search(drives['main']) is None:
-                main = drives['main']
-                for x in ('carda', 'cardb'):
-                    if x in drives and pat.search(drives[x]):
-                        drives['main'] = drives.pop(x)
-                        drives[x] = main
-                        break
-
        return drives

    def osx_bsd_names(self):
@ -431,6 +421,16 @@ class Device(DeviceConfig, DevicePlugin):
        if drives['main'] is None:
            print bsd_drives, mount_map, drives
            raise DeviceError(_('Unable to detect the %s mount point. Try rebooting.')%self.__class__.__name__)
+        pat = self.OSX_MAIN_MEM_VOL_PAT
+        if pat is not None and len(drives) > 1 and 'main' in drives:
+            if pat.search(drives['main']) is None:
+                main = drives['main']
+                for x in ('carda', 'cardb'):
+                    if x in drives and pat.search(drives[x]):
+                        drives['main'] = drives.pop(x)
+                        drives[x] = main
+                        break
+
        self._main_prefix = drives['main']+os.sep
        def get_card_prefix(c):
            ans = drives.get(c, None)