Merge from trunk

2025-07-09 03:04:10 -04:00 · 2010-09-10 08:42:19 +01:00 · 2010-09-10 08:42:19 +01:00 · 6fa4ce8603
commit 6fa4ce8603
parent f51d0a1c31 6e24780d47
4 changed files with 73 additions and 50 deletions
--- a/resources/recipes/el_pais.recipe
+++ b/resources/recipes/el_pais.recipe
@ -1,8 +1,8 @@
 #!/usr/bin/env  python
 __license__   = 'GPL v3'
-__author__    = 'Lorenzo Vigentini, based on earlier version by Kovid Goyal'
+__author__    = 'Jordi Balcells, based on an earlier version by Lorenzo Vigentini & Kovid Goyal'
 __copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
-description   = 'Main daily newspaper from Spain - v1.02 (10, January 2010)'
+description   = 'Main daily newspaper from Spain - v1.03 (03, September 2010)'
 __docformat__ = 'restructuredtext en'

 '''
@ -12,12 +12,12 @@ elpais.es
 from calibre.web.feeds.news import BasicNewsRecipe

 class ElPais(BasicNewsRecipe):
-    __author__        = 'Kovid Goyal & Lorenzo Vigentini'
+    __author__        = 'Kovid Goyal & Lorenzo Vigentini & Jordi Balcells'
    description   = 'Main daily newspaper from Spain'

    cover_url      = 'http://www.elpais.com/im/tit_logo_global.gif'
    title          = u'El Pais'
-    publisher      = 'Ediciones El Pais SL'
+    publisher      = u'Ediciones El Pa\xeds SL'
    category       = 'News, politics, culture, economy, general interest'

    language       = 'es'
@ -32,7 +32,8 @@ class ElPais(BasicNewsRecipe):
    remove_javascript = True
    no_stylesheets = True

-    keep_only_tags = [ dict(name='div', attrs={'class':['cabecera_noticia','cabecera_noticia_reportaje','contenido_noticia','caja_despiece','presentacion']})]
+    keep_only_tags = [ dict(name='div', attrs={'class':['cabecera_noticia','cabecera_noticia_reportaje','cabecera_noticia_opinion','contenido_noticia','caja_despiece','presentacion']})]
+	
    extra_css      = '''
                        p{style:normal size:12 serif}

@ -40,25 +41,29 @@ class ElPais(BasicNewsRecipe):

    remove_tags    = [
                        dict(name='div', attrs={'class':['zona_superior','pie_enlaces_inferiores','contorno_f','ampliar']}),
-                        dict(name='div', attrs={'class':['limpiar','mod_apoyo','borde_sup','votos','info_complementa','info_relacionada']}),
-                        dict(name='div', attrs={'id':['suscribirse suscrito','google_noticia','utilidades','coment','foros_not','pie','lomas']})
+                        dict(name='div', attrs={'class':['limpiar','mod_apoyo','borde_sup','votos','info_complementa','info_relacionada','buscador_m','nav_ant_sig']}),
+                        dict(name='div', attrs={'id':['suscribirse suscrito','google_noticia','utilidades','coment','foros_not','pie','lomas','calendar']}),
+                        dict(name='p', attrs={'class':'nav_meses'}),
+                        dict(attrs={'class':['enlaces_m','miniaturas_m']})
                    ]

    feeds          = [
                        (u'Titulares de portada', u'http://www.elpais.com/rss/feed.html?feedId=1022'),
                        (u'Internacional', u'http://www.elpais.com/rss/feed.html?feedId=1001'),
-                        (u'Espana', u'http://www.elpais.com/rss/feed.html?feedId=1002'),
+                        (u'Espa\xf1a', u'http://www.elpais.com/rss/feed.html?feedId=1002'),
                        (u'Deportes', u'http://www.elpais.com/rss/feed.html?feedId=1007'),
-                        (u'Economia', u'http://www.elpais.com/rss/feed.html?feedId=1006'),
-                        (u'Politica', u'http://www.elpais.com/rss/feed.html?feedId=17073'),
-                        (u'Tecnologia', u'http://www.elpais.com/rss/feed.html?feedId=1005'),
+                        (u'Econom\xeda', u'http://www.elpais.com/rss/feed.html?feedId=1006'),
+                        (u'Pol\xedtica', u'http://www.elpais.com/rss/feed.html?feedId=17073'),
+                        (u'Tecnolog\xeda', u'http://www.elpais.com/rss/feed.html?feedId=1005'),
                        (u'Cultura', u'http://www.elpais.com/rss/feed.html?feedId=1008'),
                        (u'Gente', u'http://www.elpais.com/rss/feed.html?feedId=1009'),
                        (u'Sociedad', u'http://www.elpais.com/rss/feed.html?feedId=1004'),
-                        (u'Opinion', u'http://www.elpais.com/rss/feed.html?feedId=1003'),
+                        (u'Opini\xf3n', u'http://www.elpais.com/rss/feed.html?feedId=1003'),
                        (u'Ciencia', u'http://www.elpais.com/rss/feed.html?feedId=17068'),
                        (u'Justicia y leyes', u'http://www.elpais.com/rss/feed.html?feedId=17069'),
-                    ]
+                        (u'Medio ambiente', u'http://www.elpais.com/rss/feed.html?feedId=17071'),
+                        (u'Vi\xf1etas', u'http://www.elpais.com/rss/feed.html?feedId=17058')
+                        ]

 def print_version(self, url):
    url = url+'?print=1'
--- a/src/calibre/devices/apple/driver.py
+++ b/src/calibre/devices/apple/driver.py
@ -2488,7 +2488,8 @@ class ITUNES(DriverBase):
            zf_opf.close()

            # If 'News' in tags, tweak the title/author for friendlier display in iBooks
-            if _('News') or _('Catalog') in metadata.tags:
+            if _('News') in metadata.tags or \
+               _('Catalog') in metadata.tags:
                if metadata.title.find('[') > 0:
                    metadata.title = metadata.title[:metadata.title.find('[')-1]
                date_as_author = '%s, %s %s, %s' % (strftime('%A'), strftime('%B'), strftime('%d').lstrip('0'), strftime('%Y'))
--- a/src/calibre/gui2/device.py
+++ b/src/calibre/gui2/device.py
@ -1308,35 +1308,44 @@ class DeviceMixin(object): # {{{
    def book_on_device(self, id, format=None, reset=False):
        '''
        Return an indication of whether the given book represented by its db id
-        is on the currently connected device. It returns a 4 element list. The
+        is on the currently connected device. It returns a 6 element list. The
        first three elements represent memory locations main, carda, and cardb,
        and are true if the book is identifiably in that memory. The fourth
-        is the a count of how many instances of the book were found across all
-        the memory locations.
+        is a count of how many instances of the book were found across all
+        the memory locations. The fifth is the type of match. The type can be
+        one of: None, 'uuid', 'db_id', 'metadata'. The sixth is a set of paths to the
+        matching books on the device.
        '''
-        loc = [None, None, None, 0]
+        loc = [None, None, None, 0, None, set([])]

        if reset:
            self.book_db_title_cache = None
            self.book_db_uuid_cache = None
            self.book_db_id_counts = None
+            self.book_db_uuid_path_map = None
            return

+        string_pat = re.compile('(?u)\W|[_]')
+        def clean_string(x):
+            x = x.lower() if x else ''
+            return string_pat.sub('', x)
+
        if self.book_db_title_cache is None:
            self.book_db_title_cache = []
            self.book_db_uuid_cache = []
+            self.book_db_uuid_path_map = {}
            self.book_db_id_counts = {}
            for i, l in enumerate(self.booklists()):
                self.book_db_title_cache.append({})
                self.book_db_uuid_cache.append(set())
                for book in l:
-                    book_title = book.title.lower() if book.title else ''
-                    book_title = re.sub('(?u)\W|[_]', '', book_title)
+                    book_title = clean_string(book.title)
                    if book_title not in self.book_db_title_cache[i]:
                        self.book_db_title_cache[i][book_title] = \
-                                {'authors':set(), 'db_ids':set(), 'uuids':set()}
-                    book_authors = authors_to_string(book.authors).lower()
-                    book_authors = re.sub('(?u)\W|[_]', '', book_authors)
+                                {'authors':set(), 'db_ids':set(),
+                                 'uuids':set(), 'paths':set(),
+                                 'uuid_in_library':False}
+                    book_authors = clean_string(authors_to_string(book.authors))
                    self.book_db_title_cache[i][book_title]['authors'].add(book_authors)
                    db_id = getattr(book, 'application_id', None)
                    if db_id is None:
@ -1350,32 +1359,39 @@ class DeviceMixin(object): # {{{
                    uuid = getattr(book, 'uuid', None)
                    if uuid is not None:
                        self.book_db_uuid_cache[i].add(uuid)
+                        self.book_db_uuid_path_map[uuid] = book.path
+                        if uuid in self.db_book_uuid_cache:
+                            self.book_db_title_cache[i][book_title]\
+                                    ['uuid_in_library'] = True
+                    self.book_db_title_cache[i][book_title]['paths'].add(book.path)

        mi = self.library_view.model().db.get_metadata(id, index_is_id=True)
        for i, l in enumerate(self.booklists()):
            if mi.uuid in self.book_db_uuid_cache[i]:
                loc[i] = True
+                loc[4] = 'uuid'
+                loc[5].add(self.book_db_uuid_path_map[mi.uuid])
                continue
-            db_title = re.sub('(?u)\W|[_]', '', mi.title.lower())
+            db_title = clean_string(mi.title)
            cache = self.book_db_title_cache[i].get(db_title, None)
-            if cache:
+            if cache and not cache['uuid_in_library']:
                if id in cache['db_ids']:
                    loc[i] = True
-                    continue
-                if mi.authors and \
-                        re.sub('(?u)\W|[_]', '', authors_to_string(mi.authors).lower()) \
-                        in cache['authors']:
-                    # If we get here, then two library books have the same title
-                    # and author. This can happen, especially in the case of
-                    # news. Mark a match and go on.
-                    loc[i] = True
+                    loc[4] = 'db_id'
+                    loc[5] = cache['paths']
                    continue
                # Also check author sort, because it can be used as author in
                # some formats
-                if mi.author_sort and \
-                        re.sub('(?u)\W|[_]', '', mi.author_sort.lower()) \
-                        in cache['authors']:
+                if (mi.authors and clean_string(authors_to_string(mi.authors))
+                        in cache['authors']) or (mi.author_sort and
+                        clean_string(mi.author_sort) in cache['authors']):
+                    # We really shouldn't get here, because set_books_in_library
+                    # should have set the db_ids for the books, and therefore
+                    # the if just above should have found them. Mark the book
+                    # anyway, and print a message about the situation
                    loc[i] = True
+                    loc[4] = 'metadata'
+                    loc[5] = cache['paths']
                    continue
        loc[3] = self.book_db_id_counts.get(id, 0)
        return loc
@ -1387,31 +1403,34 @@ class DeviceMixin(object): # {{{
        it sets the application_id for matched books. Book_on_device uses that
        to both speed up matching and to count matches.
        '''
+
+        string_pat = re.compile('(?u)\W|[_]')
+        def clean_string(x):
+            x = x.lower() if x else ''
+            return string_pat.sub('', x)
+
        # Force a reset if the caches are not initialized
        if reset or not hasattr(self, 'db_book_title_cache'):
            # It might be possible to get here without having initialized the
            # library view. In this case, simply give up
-            if not hasattr(self, 'library_view') or self.library_view is None:
-                return
-            db = getattr(self.library_view.model(), 'db', None)
-            if db is None:
+            try:
+                db = self.library_view.model().db
+            except:
                return
            # Build a cache (map) of the library, so the search isn't On**2
            self.db_book_title_cache = {}
            self.db_book_uuid_cache = {}
            for id in db.data.iterallids():
                mi = db.get_metadata(id, index_is_id=True)
-                title = re.sub('(?u)\W|[_]', '', mi.title.lower())
+                title = clean_string(mi.title)
                if title not in self.db_book_title_cache:
                    self.db_book_title_cache[title] = \
                                {'authors':{}, 'author_sort':{}, 'db_ids':{}}
                if mi.authors:
-                    authors = authors_to_string(mi.authors).lower()
-                    authors = re.sub('(?u)\W|[_]', '', authors)
+                    authors = clean_string(authors_to_string(mi.authors))
                    self.db_book_title_cache[title]['authors'][authors] = mi
                if mi.author_sort:
-                    aus = mi.author_sort.lower()
-                    aus = re.sub('(?u)\W|[_]', '', aus)
+                    aus = clean_string(mi.author_sort)
                    self.db_book_title_cache[title]['author_sort'][aus] = mi
                self.db_book_title_cache[title]['db_ids'][mi.application_id] = mi
                self.db_book_uuid_cache[mi.uuid] = mi
@ -1437,8 +1456,7 @@ class DeviceMixin(object): # {{{
                        self.db_book_uuid_cache[book.uuid].application_id
                    continue

-                book_title = book.title.lower() if book.title else ''
-                book_title = re.sub('(?u)\W|[_]', '', book_title)
+                book_title = clean_string(book.title)
                book.in_library = None
                d = self.db_book_title_cache.get(book_title, None)
                if d is not None:
@ -1460,8 +1478,7 @@ class DeviceMixin(object): # {{{
                    if book.authors:
                        # Compare against both author and author sort, because
                        # either can appear as the author
-                        book_authors = authors_to_string(book.authors).lower()
-                        book_authors = re.sub('(?u)\W|[_]', '', book_authors)
+                        book_authors = clean_string(authors_to_string(book.authors))
                        if book_authors in d['authors']:
                            book.in_library = True
                            book.application_id = \
--- a/src/calibre/library/database2.py
+++ b/src/calibre/library/database2.py
@ -641,7 +641,7 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
        count = 0
        on = self.book_on_device(id)
        if on is not None:
-            m, a, b, count = on
+            m, a, b, count = on[:4]
            if m is not None:
                loc.append(_('Main'))
            if a is not None: